ROX-33198: Instrument inode tracking on file open lsm hook#391
ROX-33198: Instrument inode tracking on file open lsm hook#391JoukoVirtanen wants to merge 14 commits intomainfrom
Conversation
fact/src/host_scanner.rs
Outdated
| return Ok(()); | ||
| } | ||
|
|
||
| let host_path = host_info::prepend_host_mount(event.get_filename()); |
There was a problem hiding this comment.
The initial plan was to look up the parent, get its file path, and then add the file name. However, it seems possible to get the entire path from the event.
There was a problem hiding this comment.
This only works if the file is not mounted to a different directory. Try monitoring /etc/monitored on your host, then run something like docker run --rm -it -v /etc/monitored:/in-container fedora:43 and access some files in /in-container. You should see the events have a filename in /in-container with a host path of /etc/monitored.
There was a problem hiding this comment.
This has now been changed.
tests/test_inode_tracking.py
Outdated
| # Wait for creation event | ||
| process = Process.from_proc() | ||
| creation_event = Event(process=process, event_type=EventType.CREATION, | ||
| file=fut, host_path='') |
There was a problem hiding this comment.
Should host_path be populated here.
There was a problem hiding this comment.
I have removed this file.
tests/test_inode_tracking.py
Outdated
| """ | ||
| cwd = os.getcwd() | ||
| config = { | ||
| 'paths': [f'{monitored_dir}/**', '/mounted/**', '/container-dir/**'], |
There was a problem hiding this comment.
I am not sure what globbing to use here.
fact/src/host_scanner.rs
Outdated
| if path.is_file() || path.is_dir() { | ||
| self.metrics.scan_inc(ScanLabels::FileScanned); |
There was a problem hiding this comment.
Would be nice to have separate metrics here, so we can check how many files and directories we are tracking exactly.
fact/src/host_scanner.rs
Outdated
| return Ok(()); | ||
| } | ||
|
|
||
| let host_path = host_info::prepend_host_mount(event.get_filename()); |
There was a problem hiding this comment.
This only works if the file is not mounted to a different directory. Try monitoring /etc/monitored on your host, then run something like docker run --rm -it -v /etc/monitored:/in-container fedora:43 and access some files in /in-container. You should see the events have a filename in /in-container with a host path of /etc/monitored.
fact/src/host_scanner.rs
Outdated
| .with_context(|| format!("Failed to add creation event entry for {}", host_path.display()))?; | ||
| } else { | ||
| debug!("Creation event for non-existent file: {}", host_path.display()); | ||
| self.metrics.scan_inc(ScanLabels::FileRemoved); |
There was a problem hiding this comment.
It's probably worth it to add a separate label for this case, missing a host file is a separate condition IMO.
There was a problem hiding this comment.
I think this is outdated now.
| if event.is_creation() { | ||
| if let Err(e) = self.handle_creation_event(&event) { |
There was a problem hiding this comment.
If we change edition in fact/Cargo.toml to 2024 we can rewrite this like:
if event.is_creation() &&
let Err(e) = self.handle_creation_event(&event) {There was a problem hiding this comment.
I think formatting checks started to fail after I updated edition. Perhaps that change could be moved to a different PR.
There was a problem hiding this comment.
I have a PR to update edition and make the formatting changes here #413
fact/src/host_scanner.rs
Outdated
|
|
||
| /// Handle file creation events by adding new inodes to the map. | ||
| fn handle_creation_event(&self, event: &Event) -> anyhow::Result<()> { | ||
| if self.get_host_path(Some(event.get_inode())).is_some() { |
There was a problem hiding this comment.
This condition can never be true, the events coming from the kernel don't have the host path, I would remove the block.
There was a problem hiding this comment.
This has been changed.
tests/test_inode_tracking.py
Outdated
There was a problem hiding this comment.
Why do we need a separate test for this? I assume you should be able to tweak the tests in test_file_open.py to test your changes.
There was a problem hiding this comment.
This file has now been removed.
… can now use untrusted pointers
| } | ||
|
|
||
| unsigned long magic = inode->i_sb->s_magic; | ||
| unsigned long magic = BPF_CORE_READ(inode, i_sb, s_magic); |
There was a problem hiding this comment.
inode_to_key needs to be able to use untrusted pointers, so that it can get the key for a parent inode. To get it to be able to use untrusted pointers, BPF_CORE_READ is used.
| // For file creation events, check if the parent directory is being | ||
| // monitored. If so, add the new file's inode to the tracked set. | ||
| if (event_type == FILE_ACTIVITY_CREATION) { | ||
| struct dentry* parent_dentry = BPF_CORE_READ(file, f_path.dentry, d_parent); | ||
| if (parent_dentry) { | ||
| struct inode* parent_inode = BPF_CORE_READ(parent_dentry, d_inode); | ||
| inode_key_t parent_key = inode_to_key(parent_inode); | ||
|
|
||
| if (inode_is_monitored(inode_get(&parent_key)) == MONITORED) { | ||
| inode_add(&inode_key); | ||
| } | ||
| } | ||
| } |
There was a problem hiding this comment.
Can we move some of this logic to is_monitored? We can change it to return an enum with values like MONITORED, NOT_MONITORED and PARENT_MONITORED, then we can decide what to do based on that returned value (i.e, add the inode if the event is creation and the parent is monitored).
There was a problem hiding this comment.
Maybe. I tried this locally. It seemed to involve a lot of changes.
There was a problem hiding this comment.
My initial attempt didn't work. I might try again.
| const char filename[PATH_MAX], | ||
| inode_key_t* inode) { | ||
| inode_key_t* inode, | ||
| inode_key_t* parent_inode) { |
There was a problem hiding this comment.
The parent inode was added to the event so that it could be used to find the parent path. It is added for all event types, not just when a file is opened. Perhaps NULL could be passed instead of the actual parent inode for the other type of events for now.
There was a problem hiding this comment.
Yeah, passing NULL should be enough for now
…et the inode and then add to the map
tests/test_editors/test_nvim.py
Outdated
| fut = '/mounted/test.txt' | ||
| fut_host = f'{ignored_dir}/test.txt' | ||
| fut_backup = f'{fut}~' | ||
| fut_backup_host = f'{ignored_dir}/test.txt~' |
There was a problem hiding this comment.
fut_host and fut_backup_host should probably be blank.
tests/test_editors/test_sed.py
Outdated
| def test_sed(vi_container, server, ignored_dir): | ||
| # File Under Test | ||
| fut = '/mounted/test.txt' | ||
| fut_host = f'{ignored_dir}/test.txt' |
There was a problem hiding this comment.
Same as above.
Molter73
left a comment
There was a problem hiding this comment.
There seems to be a lot of style changes, is this due to you using a newer Rust version? Can you check what version you are using? Is it 1.92+? If this is due to a version difference and yours is newer, can you open a new PR with those style changes? that way we can focus this PR on the actual inode tracking.
Actually, on a second read-through, it might also be related to the bump to version 2024, if so, please move the bump to a separate PR and we can stack this one on top of that one, sorry for the inconvenience! |
Molter73
left a comment
There was a problem hiding this comment.
I still need to go through the integration test changes, but I see them failing in the PR, do you need help debugging the failures?
| self.update_entry(path.as_path()).with_context(|| { | ||
| format!("Failed to update entry for {}", path.display()) | ||
| })?; | ||
| } else if path.is_dir() { |
There was a problem hiding this comment.
This is slightly related to a conversation I've been having with @Stringy. IMO, in the case of finding a directory that matches a glob pattern, we should recursively scan into it, the reason being that tracking the directory itself isn't of much value if the pattern doesn't hit any files inside the directory.
For now we can leave it like this, but I think we need to take a close look at what users might expect when configuring different types of glob patterns (i.e do they want that strict set of patterns to be monitored, or are they expressing a set of paths that work as the base to be monitored?)
| /// We use the parent inode provided by the eBPF code | ||
| /// to look up the parent directory's host path, then construct the full | ||
| /// path by appending the new file's name. | ||
| fn handle_creation_event(&self, event: &Event) -> anyhow::Result<()> { |
There was a problem hiding this comment.
I'm hoping this is still WIP and the debug statements are for testing, we really don't need a debug statement on each return condition, particularly not in the hot path where monitoring sets of files mildly noisy will just flood the logs and make it hard to see what is actually going on.
Please cut down on the debug statements, if something is expected to happen, don't put a statement there.
There was a problem hiding this comment.
I tried removing all the debug statements in this method and managed to boil it down from 50+ lines to just this:
let inode = event.get_inode();
let parent_inode = event.get_parent_inode();
if self.get_host_path(Some(inode)).is_some() || parent_inode.empty() {
return Ok(());
}
if let Some(filename) = event.get_filename().file_name()
&& let Some(parent_host_path) = self.get_host_path(Some(parent_inode))
{
let host_path = parent_host_path.join(filename);
self.update_entry_with_inode(inode, host_path)
.with_context(|| {
format!(
"Failed to add creation event entry for {}",
filename.display()
)
})?;
}
Ok(())Given this is a lot more compact and easy to ready and the additional verbosity in the logs the implementation in the PR doesn't bring much of a benefit IMO, I would propose we try to move the implementation closer to this.
8fc3c95 to
4f1686a
Compare
Description
Host file paths need to be reported correctly. When a file is created in a tracked directory its inode should be added to a hash set in kernel space. In user space an entry needs to be added into a map with the inode as the key and file path as the value.
Checklist
Automated testing
If any of these don't apply, please comment below.
Testing Performed
The following command was run in the main branch and this branch.
The following commands were also run
One of the tests was run with bye_world.txt instead.
In the main branch
{ "timestamp": 1773981129521946307, "hostname": "jvirtane-thinkpadp1gen3.rmtuswa.csb", "process": { "comm": "vim", "args": [ "/usr/bin/vim", "bye_world.txt" ], "exe_path": "/usr/bin/vim", "container_id": null, "uid": 0, "username": "root", "gid": 0, "login_uid": 4203274, "pid": 1252416, "in_root_mount_ns": true, "lineage": [ { "uid": 4203274, "exe_path": "/usr/bin/sudo" }, { "uid": 4203274, "exe_path": "/usr/bin/sudo" } ] }, "file": { "Chmod": { "inner": { "filename": "/etc/sensitive-files/bye_world.txt", "host_file": "", "inode": { "inode": 0, "dev": 0 } }, "new_mode": 33188, "old_mode": 33188 } } }Note that the
host_fileis blank.In this branch
{ "timestamp": 1773980922622536024, "hostname": "jvirtane-thinkpadp1gen3.rmtuswa.csb", "process": { "comm": "vim", "args": [ "/usr/bin/vim", "hello_world.txt" ], "exe_path": "/usr/bin/vim", "container_id": null, "uid": 0, "username": "root", "gid": 0, "login_uid": 4203274, "pid": 1251608, "in_root_mount_ns": true, "lineage": [ { "uid": 4203274, "exe_path": "/usr/bin/sudo" }, { "uid": 4203274, "exe_path": "/usr/bin/sudo" } ] }, "file": { "Chmod": { "inner": { "filename": "/etc/sensitive-files/hello_world.txt", "host_file": "/etc/sensitive-files/hello_world.txt", "inode": { "inode": 74960529, "dev": 37 }, "parent_inode": { "inode": 0, "dev": 0 } }, "new_mode": 33188, "old_mode": 33188 } } }Note that
host_fileis now populated.