diff --git a/.bumpversion.cfg b/.bumpversion.cfg
deleted file mode 100644
index cb8dd72..0000000
--- a/.bumpversion.cfg
+++ /dev/null
@@ -1,6 +0,0 @@
-[bumpversion]
-current_version = 0.2.9
-commit = True
-tag = True
-
-[bumpversion:file:setup.py]
\ No newline at end of file
diff --git a/.github/workflows/build_test_linux.yml b/.github/workflows/build_test_linux.yml
index 0dccbbc..90c449a 100644
--- a/.github/workflows/build_test_linux.yml
+++ b/.github/workflows/build_test_linux.yml
@@ -1,65 +1,70 @@
-name: build_test_linux
+name: Linux Build
+
on:
push:
- branches:
- - main
+ branches: [ main ]
pull_request:
- branches:
- - main
+ branches: [ main ]
+
jobs:
- build_test_linux:
- name: ${{ matrix.python-version }} on ${{ matrix.os }}
+ build:
runs-on: ${{ matrix.os }}
strategy:
- fail-fast: false
matrix:
- os: ["ubuntu-latest"]
- python-version: ["pypy-3.9",
- "pypy-3.8",
- # "pypy-3.7",
- "3.11",
- "3.10",
- "3.9",
- "3.8",
- # "3.7"
- ]
+ os: [ubuntu-latest]
+
+ env:
+ CMAKE_BUILD_TYPE: Release
steps:
- - uses: actions/checkout@v3
- with:
- submodules: recursive
- - name: Setup py-${{ matrix.python-version }}
- uses: actions/setup-python@v4
- with:
- python-version: ${{ matrix.python-version }}
- # Caching improves build time, we use pythonLocation to cache everything including wheels to avoid building
- # wheels at each build (pandas/Pypy is extremely time consuming)
- # sed replacement is performed to rectify PyPy path which ends with /bin
- # cache key takes into account the Python version of the runner to avoid version mismatch on updates.
- - name: Get pip cache path
- id: get-pip-path
- run: |
- id=$(echo ${{ env.pythonLocation }} | sed 's/\/bin//g')
- echo "::set-output name=id::$id"
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Set up CMake
+ uses: lukka/get-cmake@v3.29.2
+
+ - name: Cache APT packages
+ uses: actions/cache@v4
+ with:
+ path: |
+ /var/cache/apt/archives
+ key: ${{ runner.os }}-apt-${{ hashFiles('**/CMakeLists.txt') }}
+ restore-keys: |
+ ${{ runner.os }}-apt-${{ hashFiles('**/CMakeLists.txt') }}
- - name: Pip cache
- uses: actions/cache@v3
- id: pip-cache
- with:
- path: ${{ steps.get-pip-path.outputs.id }}
- key: ${{ steps.get-pip-path.outputs.id }}-${{ hashFiles('setup.py') }}-${{ hashFiles('requirements.txt') }}
+ - name: Install dependencies
+ run: |
+ sudo apt-get update
+ sudo apt-get install -y \
+ build-essential \
+ libssl-dev \
+ libopencv-dev \
+ libpcap-dev \
+ libpthread-stubs0-dev \
+ git
- - name: Install requirements
- if: steps.pip-cache.outputs.cache-hit != 'true'
- run: |
- python -m pip install --upgrade pip
- python -m pip install -r requirements.txt
+ - name: Install precompiled PcapPlusPlus
+ run: |
+ curl -LO https://github.com/seladb/PcapPlusPlus/releases/download/v25.05/pcapplusplus-25.05-ubuntu-22.04-gcc-11.4.0-x86_64.tar.gz
+ tar -xzf pcapplusplus-25.05-ubuntu-22.04-gcc-11.4.0-x86_64.tar.gz
+ cd pcapplusplus-25.05-ubuntu-22.04-gcc-11.4.0-x86_64
+ sudo cp -r include/* /usr/local/include/
+ sudo cp -r lib/* /usr/local/lib/
+ sudo ldconfig
- - name: Build
- run: |
- python -m pip install .
+ - name: Configure and Build
+ run: |
+ cd heiFIP/
- - name: Test
- if: startsWith(matrix.os, 'ubuntu') && !startsWith(matrix.python-version, '3.10')
- run: |
- python -m pytest tests.py
+ mkdir -p build && cd build
+
+ cmake .. \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DUSE_MANUAL_PCAPPLUSPLUS=ON \
+ -DPcapPlusPlus_INCLUDE_DIRS="/usr/local/include/pcapplusplus/" \
+ -DPcapPlusPlus_LIBRARIES="/usr/local/lib/libCommon++.a;/usr/local/lib/libPacket++.a;/usr/local/lib/libPcap++.a" \
+ -DUSE_MANUAL_OPENSSL=ON \
+ -DOPENSSL_INCLUDE_DIR="/usr/include/openssl" \
+ -DOPENSSL_CRYPTO_LIBRARY="/usr/lib/x86_64-linux-gnu/libcrypto.a"
+
+ make -j$(nproc)
diff --git a/.github/workflows/build_test_macos.yml b/.github/workflows/build_test_macos.yml
index c155e55..d8e1d1a 100644
--- a/.github/workflows/build_test_macos.yml
+++ b/.github/workflows/build_test_macos.yml
@@ -1,67 +1,51 @@
-name: build_test_macos
+name: MacOS Build
+
on:
push:
- branches:
- - main
+ branches: [ main ]
pull_request:
- branches:
- - main
+ branches: [ main ]
+
jobs:
- build_test_macos:
- name: ${{ matrix.python-version }} on ${{ matrix.os }}
+ build:
runs-on: ${{ matrix.os }}
strategy:
- fail-fast: false
matrix:
- os: ["macos-12"]
- python-version: ["pypy-3.9",
- "pypy-3.8",
- # "pypy-3.7",
- # "3.11",
- # Not available yet.
- "3.10",
- "3.9",
- "3.8",
- # "3.7"
- ]
+ os: [macos-latest]
+
+ env:
+ CMAKE_BUILD_TYPE: Release
steps:
- - uses: actions/checkout@v3
- with:
- submodules: recursive
- - name: Setup py-${{ matrix.python-version }}
- uses: actions/setup-python@v4
- with:
- python-version: ${{ matrix.python-version }}
- # Caching improves build time, we use pythonLocation to cache everything including wheels to avoid building
- # wheels at each build (pandas/Pypy is extremely time consuming)
- # sed replacement is performed to rectify PyPy path which ends with /bin
- # cache key takes into account the Python version of the runner to avoid version mismatch on updates.
- - name: Get pip cache path
- id: get-pip-path
- run: |
- id=$(echo ${{ env.pythonLocation }} | sed 's/\/bin//g')
- echo "::set-output name=id::$id"
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Set up CMake
+ uses: lukka/get-cmake@v3.29.2
+
+ - name: Set up Homebrew cache
+ uses: actions/cache@v4
+ with:
+ path: |
+ /Users/runner/Library/Caches/Homebrew
+ /usr/local/Homebrew/Library/Taps
+ /usr/local/Cellar
+ key: ${{ runner.os }}-brew-${{ hashFiles('**/CMakeLists.txt') }}
+ restore-keys: |
+ ${{ runner.os }}-brew-${{ hashFiles('**/CMakeLists.txt') }}
- - name: Pip cache
- uses: actions/cache@v3
- id: pip-cache
- with:
- path: ${{ steps.get-pip-path.outputs.id }}
- key: ${{ steps.get-pip-path.outputs.id }}-${{ hashFiles('setup.py') }}-${{ hashFiles('requirements.txt') }}
+ - name: Install dependencies
+ run: |
+ brew update
+ brew install openssl opencv libpcap cmake git pcapplusplus
- - name: Install requirements
- if: steps.pip-cache.outputs.cache-hit != 'true'
- run: |
- python -m pip install --upgrade pip
- python -m pip install -r requirements.txt
+ - name: Configure and Build
+ run: |
+ cd heiFIP/
- - name: Build
- run: |
- python -m pip install .
+ mkdir -p build && cd build
- - name: Test
- # On other versions then 3.9, we test only. (without coverage generation)
- if: startsWith(matrix.os, 'macos') && !startsWith(matrix.python-version, '3.9') && !startsWith(github.ref, 'refs/tags/')
- run: |
- python -m pytest tests.py
+ cmake .. \
+ -DCMAKE_BUILD_TYPE=Release
+
+ make
diff --git a/.github/workflows/build_test_windows.yml b/.github/workflows/build_test_windows.yml
deleted file mode 100644
index 30b2c91..0000000
--- a/.github/workflows/build_test_windows.yml
+++ /dev/null
@@ -1,58 +0,0 @@
-name: build_test_windows
-on:
- push:
- branches:
- - main
- pull_request:
- branches:
- - main
-jobs:
- build_test_windows:
- name: ${{ matrix.python-version }} on ${{ matrix.os }}
- runs-on: ${{ matrix.os }}
- strategy:
- fail-fast: false
- matrix:
- os: ["windows-latest"]
- python-version: ["pypy-3.9",
- "pypy-3.8",
- # "pypy-3.7", does work atm.
- "3.11",
- "3.9",
- # "3.8",
- # "3.7"
- ]
-
- steps:
- - uses: actions/checkout@v3
- with:
- submodules: recursive
- - name: Setup py-${{ matrix.python-version }}
- uses: actions/setup-python@v4
- with:
- python-version: ${{ matrix.python-version }}
-
- - name: Setup msys2
- uses: msys2/setup-msys2@v2
- with:
- msystem: MINGW64
- update: true
- install: git unzip mingw-w64-x86_64-libjpeg-turbo mingw-w64-x86_64-zlib mingw-w64-x86_64-libtiff mingw-w64-x86_64-freetype mingw-w64-x86_64-lcms2 mingw-w64-x86_64-libwebp mingw-w64-x86_64-openjpeg2 mingw-w64-x86_64-libimagequant mingw-w64-x86_64-libraqm mingw-w64-x86_64-gcc mingw-w64-x86_64-python3 mingw-w64-x86_64-python3-pip mingw-w64-x86_64-python3-setuptools
-
- - name: Install requirements
- run: |
- python -m pip install --upgrade pip
- python -m pip install -r requirements.txt
-
- - name: Build
- env:
- MSYSTEM: MINGW64
- MSYS2_PATH: D:/a/_temp/msys64
- run: |
- python -m pip install .
-
- - name: Test
- # On other versions then 3.9, we test only. (without coverage generation)
- if: startsWith(matrix.os, 'windows') && !startsWith(matrix.python-version, '3.9') && !startsWith(github.ref, 'refs/tags/')
- run: |
- python -m pytest tests.py
diff --git a/.github/workflows/build_wheel_publish.yml b/.github/workflows/build_wheel_publish.yml
deleted file mode 100644
index 73dbb04..0000000
--- a/.github/workflows/build_wheel_publish.yml
+++ /dev/null
@@ -1,27 +0,0 @@
-name: build_wheel_publish
-
-on:
- release:
- types: [created]
-
-jobs:
- build_wheels:
- name: Build wheels
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v3
- - name: Set up Python
- uses: actions/setup-python@v3
- with:
- python-version: '3.x'
- - name: Install dependencies
- run: |
- python -m pip install --upgrade pip
- pip install build
- - name: Build package
- run: python -m build
- - name: Publish package
- uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
- with:
- user: __token__
- password: ${{ secrets.PYPI_API_TOKEN }}
diff --git a/README.md b/README.md
index 4d4ce4a..672bf79 100644
--- a/README.md
+++ b/README.md
@@ -10,32 +10,10 @@ Currently, we only support **offline** network data analysis.
However, we plan to adapt our library to support **online** network data too to enable live-probing of models.
-
- | Live Notebook |
-
-
-
-
- |
-
| Latest Release |
-
-
-
- |
-
-
-
- | Supported Versions |
-
-
-
-
-
-
-
+ Version 1.0
|
@@ -63,9 +41,6 @@ However, we plan to adapt our library to support **online** network data too to
-
-
-
@@ -99,114 +74,135 @@ The idea to create heiFIP came from working with Deep Learning approaches to cla
- **Max packets per flow** allows you to specify the maximum number of packets per flow. If the total number of packets is too great, the remaining images are discarded.
- **Packet Image** converts a single packet into an image.
- **Markov Transition Matrix Image**: converts a packet or a flow into a Markov representation.
-- **Header** processing allows you to customize header fields of different protocols. It aims to remove biasing fields. For more details look into [header.py](https://github.com/stefanDeveloper/heiFIP/blob/main/heifip/plugins/header.py)
+- **Header** processing allows you to customize header fields of different protocols. It aims to remove biasing fields.
- **Remove Payload** options allows you to only work on header data.
-- **Fast and flexible**: We rely on [Scapy](https://github.com/secdev/scapy) for our sniffing and header processing. Image preparation is based on raw bytes.
+- **Fast and flexible**: The main image precessing is in raw bytes inside the image classes while for the header preprocessing is PcapPlusPlus is used.
- **Machine learning orientation**: heiFIP aims to make Deep Learning approaches using network data as images reproducible and deployable. Using heiFIP as a common framework enables researches to test and verify their models.
## Examples
| Image Type | Description | Example |
|------------|-------------|---------|
-| Packet | Converts a single packet into a square image. Size depends on the total length |  |
-| Flow | Converts a flow packet into a square image |  |
-| Markov Transition Matrix Packet | Converts a packet into a Markov Transition Matrix. Size is fixed to 16x16. |  |
-| Markov Transition Matrix Flow | Converts a flow into a Markov Transition Matrix. It squares the image based on the number of packets |  |
+| Packet | Converts a single packet into a square image. Size depends on the total length |  |
+| Flow | Converts a flow packet into a square image |  |
+| Markov Transition Matrix Packet | Converts a packet into a Markov Transition Matrix. Size is fixed to 16x16. |  |
+| Markov Transition Matrix Flow | Converts a flow into a Markov Transition Matrix. It squares the image based on the number of packets |  |
-## Getting Started
+## Requirements
-Install our package using PyPi
+* **C++ Compiler**: GCC ≥ 9.0, Clang ≥ 10, or MSVC 2019 with C++17 support.
+* **CMake**: Version ≥ 3.14
+* **PcapPlusPlus**: Installed system‑wide or built locally. ([https://github.com/seladb/PcapPlusPlus](https://github.com/seladb/PcapPlusPlus))
+* **OpenSSL**: For SHA256 hashing (libcrypto).
+* **OpenCV**: Version ≥ 4.0 for image handling and saving (e.g., cv::imwrite).
+* **pthread**: POSIX threads (Linux/macOS). Windows users require linking against `-lws2_32` and `-lIPHLPAPI`.
+* **libpcap**: PCAP Support (Linux/macOS)
-```sh
-pip install heifip
-```
-Now, you can use the integrate CLI:
+Optional:
-```sh
-> fip
-Usage: fip [OPTIONS] COMMAND [ARGS]...
+* **getopt\_long**: For CLI parsing (provided by libc on Linux/macOS). Windows may need `getopt` replacement.
-Options:
- --version Show the version and exit.
- -h, --help Show this message and exit.
+## Building from source
-Commands:
- extract
-```
-To extract images from PCAPs, we currently split the command into flow and packet:
-
-```sh
-> fip extract
-Starting FlowImageProcessor CLI
-Usage: fip extract [OPTIONS] COMMAND [ARGS]...
-
-Options:
- -h, --help Show this message and exit.
-
-Commands:
- flow
- packet
-
-# Show help information
-> fip extract [flow/packet]-h
-Starting FlowImageProcessor CLI
-Usage: fip extract flow [OPTIONS]
-
-Options:
- -w, --write PATH Destination file path, stores result [required]
- -r, --read PATH [required]
- -t, --threads INTEGER Number of parallel threads that can be used
- [default: 4]
- --preprocess [NONE|HEADER] Applies a preprocessing to the input data: none:
- No preprocessing payload: Only payload data is
- used header: Preprocesses headers
- (DNS,HTTP,IP,IPv6,TCP,UDP supported) to remove
- some biasing data [default: NONE]
- --min_im_dim INTEGER Minimum dim ouput images need to have, 0=No
- minimum dim [default: 0]
- --max_im_dim INTEGER Maximum dim ouput images can have, 0=No maximum
- dim [default: 0]
- --remove_duplicates Within a single output folder belonging to a
- single input folder no duplicate images will be
- produced if two inputs lead to the same image
- --min_packets INTEGER Minimum packets that a FlowImage needs to have,
- 0=No minimum packets per flow [default: 0]
- --max_packets INTEGER Minimum packets that a FlowImage needs to have,
- 0=No minimum packets per flow [default: 0]
- --append
- --tiled
- --width INTEGER [default: 128]
- -h, --help Show this message and exit.
-
-> fip extract flow -r /PATH/PCAPs -w /PATH/IMAGES
-```
+```bash
+# Clone this repo
+git clone https://github.com/yourusername/heiFIPCpp.git
+cd heiFIP/heiFIP/
-Import FIPExtractor to run it inside your program:
+# Create build directory
+mkdir build && cd build
-```python
-extractor = FIPExtractor()
-img = extractor.create_image('./test/pcaps/dns/dns-binds.pcap')
-extractor.save_image(img, './test/pcaps/dns/dns-binds.pcap')
-```
+cmake ..
-### Building from source
+# We highly recommend that locating necessary dependencies is done manually since espically
+# Pcap Plus Plus is often not installed in standard locations. While we do use scripts to automatically detect
+# the necessary dependencies if those scripts fail you can specify the paths to the include directories of the header
+# files aswell as the paths to libaries manually like so. Also do not forget to specify all three of Pcap Plus Plus's
+# libaries libCommon++, libPacket++, libPcap++. For OpenCV doing this manually while possible, due to number of links
+# necessary, is very difficult. Since OpenCV is configured for Cmake anyway this is unnecessary anyway. When using macOS
+# you need to be very careful that the linked libraries are not Intel (x86_64) bottles, since if this happens the code
+# will still be compiled as ARM64 but dynamically linking against x86_64 .dylib. This forces macOS to convert
+# back to ARM64 at runtime using Rosetta 2 which encures significant overhead. So if possible use a Linux distribution
-Simply run:
+cmake .. \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DUSE_MANUAL_PCAPPLUSPLUS=ON \
+ -DPcapPlusPlus_INCLUDE_DIRS="/opt/homebrew/Cellar/pcapplusplus/25.05/include" \
+ -DPcapPlusPlus_LIBRARIES="/opt/homebrew/Cellar/pcapplusplus/25.05/lib/libCommon++.a\;/opt/homebrew/Cellar/pcapplusplus/25.05/lib/libPacket++.a\;/opt/homebrew/Cellar/pcapplusplus/25.05/lib/libPcap++.a" \
+ -DUSE_MANUAL_OPENSSL=ON \
+ -DOPENSSL_INCLUDE_DIR="/opt/homebrew/opt/openssl@3/include" \
+ -DOPENSSL_CRYPTO_LIBRARY="/opt/homebrew/opt/openssl@3/lib/libcrypto.a"
-```
-pip install .
+# Compile
+make -j$(nproc)
+
+# or
+cmake --build build
+
+# The executable 'heiFIPCpp' will be produced in build/
```
-### Publications that use heiFIP
-- [A Generalizable Approach for Network Flow Image Representation for Deep Learning] - CSNet 23
-- [Explainable artificial intelligence for improving a session-based malware traffic classification with deep learning] - SSCI 23
+## Getting Started
+
+After installation the command line interface can be used to extract images from pcap files witht he following command
+```bash
+./heiFIPCpp \
+ --name HelloHeiFIP
+ --input /path/to/capture.pcap \
+ --output /path/to/outdir \
+ --threads 4 \
+ --processor HEADER \
+ --mode FlowImageTiledAuto \
+ --dim 16 \
+ --apppend \
+ --fill 0 \
+ --min-dim 10 \
+ --max-dim 2000 \
+ --min-pkts 10 \
+ --max-pkts 100 \
+ --remove-dup
+```
+### Options
+| Flag | Description |
+| ------------------- | -------------------------------------------------------------- |
+| `-i`, `--input` | Input PCAP file path |
+| `-o`, `--output` | Output directory |
+| `-t`, `--threads` | Number of worker threads (default: 1) |
+| `-p`, `--processor` | Preprocessing: `NONE` or `HEADER` |
+| `-m`, `--mode` | Image type: `PacketImage`, `FlowImage`, `FlowImageTiledFixed`, |
+| | `FlowImageTiledAuto`, `MarkovTransitionMatrixFlow`, |
+| | `MarkovTransitionMatrixPacket` |
+| `--dim` | Base dimension for image (e.g. width/height in pixels) |
+| `--fill` | Fill or padding value (0–255) |
+| `--cols` | Number of columns (for tiled/fixed or Markov flow) |
+| `--auto-dim` | Enable auto‑dimension selection (bool) |
+| `--append` | Enable auto‑dimension selection (bool) |
+| `--min-dim` | Minimum allowed image dimension |
+| `--max-dim` | Maximum allowed image dimension |
+| `--min-pkts` | Minimum packets per flow (for tiled/flow modes) |
+| `--max-pkts` | Maximum packets per flow |
+| `--remove-dup` | Remove duplicate flows/packets by hash |
+| `--name` | Filname of processed image |
+| `-h`, `--help` | Show this help message |
+
+## Extending
+
+To add a new image type:
+
+1. Define a new `ImageArgs` struct in `extractor.cpp`.
+2. Extend the `ImageType` enum.
+3. Implement the conversion in `PacketProcessor::createImageFromPacket()`.
+4. Update the CLI `--mode` parser to include your new type.
+
+---
-## Credits
+### Publications that use heiFIP
-[NFStream](https://github.com/nfstream/nfstream) for the inspiration of the `README.md` and workflow testing.
+- S. Machmeier, M. Hoecker, V. Heuveline, "Explainable Artificial Intelligence for Improving a Session-Based Malware Traffic Classification with Deep Learning", in 2023 IEEE Symposium Series on Computational Intelligence (SSCI), Mexico-City, Mexico, 2023. https://doi.org/10.1109/SSCI52147.2023.10371980
+- S. Machmeier, M. Trageser, M. Buchwald, and V. Heuveline, "A generalizable approach for network flow image representation for deep learning", in 2023 7th Cyber Security in Networking Conference (CSNet), Montréal, Canada, 2023. https://doi.org/10.1109/CSNet59123.2023.10339761
### Authors
@@ -214,6 +210,7 @@ The following people contributed to heiFIP:
- [Stefan Machmeier](https://github.com/stefanDeveloper): Creator
- [Manuel Trageser](https://github.com/maxi99manuel99): Header extraction and customization.
+- [Henri Rebitzky](https://github.com/HenriRebitzky): Coversion from python to c++
## License
diff --git a/examples/.DS_Store b/examples/.DS_Store
new file mode 100644
index 0000000..5008ddf
Binary files /dev/null and b/examples/.DS_Store differ
diff --git a/examples/flow-tiled.png b/examples/flow-tiled.png
index 8eacaf5..c5a40bb 100644
Binary files a/examples/flow-tiled.png and b/examples/flow-tiled.png differ
diff --git a/examples/markov-flow.png b/examples/markov-flow.png
index f6d4667..f943d24 100644
Binary files a/examples/markov-flow.png and b/examples/markov-flow.png differ
diff --git a/examples/markov-packet.png b/examples/markov-packet.png
index 63f6c67..56d807c 100644
Binary files a/examples/markov-packet.png and b/examples/markov-packet.png differ
diff --git a/examples/packet.png b/examples/packet.png
index d08f7b5..7475139 100644
Binary files a/examples/packet.png and b/examples/packet.png differ
diff --git a/heiFIP/CMakeLists.txt b/heiFIP/CMakeLists.txt
new file mode 100644
index 0000000..5829da7
--- /dev/null
+++ b/heiFIP/CMakeLists.txt
@@ -0,0 +1,97 @@
+cmake_minimum_required(VERSION 3.14)
+project(heiFIP)
+
+set(CMAKE_CXX_STANDARD 20)
+
+# === 1. Default to Release mode ===
+if(NOT CMAKE_BUILD_TYPE)
+ set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
+endif()
+
+# === 2. Maximum optimization flags for Release ===
+set(CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -funroll-loops -flto=auto -DNDEBUG")
+set(CMAKE_C_FLAGS_RELEASE "-O3 -march=native -funroll-loops -flto=auto -DNDEBUG")
+
+# === 3. Manual override support ===
+
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
+
+# --- OpenSSL ---
+option(USE_MANUAL_OPENSSL "Use manual OpenSSL paths" OFF)
+if(USE_MANUAL_OPENSSL)
+ if(NOT DEFINED OPENSSL_INCLUDE_DIR OR NOT DEFINED OPENSSL_CRYPTO_LIBRARY)
+ message(FATAL_ERROR "Manual OpenSSL mode requires OPENSSL_INCLUDE_DIR and OPENSSL_CRYPTO_LIBRARY")
+ endif()
+ set(OpenSSL_INCLUDE_DIRS "${OPENSSL_INCLUDE_DIR}")
+ set(OpenSSL_LIBRARIES "${OPENSSL_CRYPTO_LIBRARY}")
+ if(DEFINED OPENSSL_SSL_LIBRARY)
+ list(APPEND OpenSSL_LIBRARIES "${OPENSSL_SSL_LIBRARY}")
+ endif()
+else()
+ find_package(OpenSSL REQUIRED)
+ set(OpenSSL_INCLUDE_DIRS "${OPENSSL_INCLUDE_DIR}")
+ set(OpenSSL_LIBRARIES "${OPENSSL_CRYPTO_LIBRARY}")
+endif()
+
+# --- OpenCV ---
+option(USE_MANUAL_OPENCV "Use manual OpenCV paths" OFF)
+if(USE_MANUAL_OPENCV)
+ if(NOT DEFINED OpenCV_INCLUDE_DIR OR NOT DEFINED OpenCV_LIBRARIES)
+ message(FATAL_ERROR "Manual OpenCV mode requires OpenCV_INCLUDE_DIR and OpenCV_LIBRARIES")
+ endif()
+ set(OpenCV_INCLUDE_DIRS "${OpenCV_INCLUDE_DIR}")
+ set(OpenCV_LIBS "${OpenCV_LIBRARIES}")
+else()
+ find_package(OpenCV REQUIRED)
+ set(OpenCV_INCLUDE_DIRS "${OpenCV_INCLUDE_DIRS}")
+ set(OpenCV_LIBS "${OpenCV_LIBRARIES}")
+endif()
+
+# --- PcapPlusPlus ---
+option(USE_MANUAL_PCAPPLUSPLUS "Use manual PcapPlusPlus paths" OFF)
+if(USE_MANUAL_PCAPPLUSPLUS)
+ if(NOT DEFINED PcapPlusPlus_INCLUDE_DIRS OR NOT DEFINED PcapPlusPlus_LIBRARIES)
+ message(FATAL_ERROR "Manual PcapPlusPlus mode requires PcapPlusPlus_INCLUDE_DIRS and PcapPlusPlus_LIBRARIES")
+ endif()
+ # Append required external dependencies manually
+ list(APPEND PcapPlusPlus_LIBRARIES pcap pthread)
+else()
+ find_package(PcapPlusPlus REQUIRED)
+endif()
+
+# === 4. User project headers ===
+include_directories(
+ ${CMAKE_SOURCE_DIR}/assets
+ ${CMAKE_SOURCE_DIR}/plugins
+ ${CMAKE_SOURCE_DIR}/images
+ ${CMAKE_SOURCE_DIR}/layers
+)
+
+# === 5. Executables ===
+add_executable(heiFIP cli.cpp)
+add_executable(main main.cpp)
+# main2.cpp removed
+
+# === 6. Apply includes and libraries ===
+foreach(_target IN ITEMS heiFIP main)
+ target_include_directories(${_target} PUBLIC
+ ${PcapPlusPlus_INCLUDE_DIRS}
+ ${OpenSSL_INCLUDE_DIRS}
+ ${OpenCV_INCLUDE_DIRS}
+ )
+ target_link_libraries(${_target} PUBLIC
+ ${OpenSSL_LIBRARIES}
+ ${PcapPlusPlus_LIBRARIES}
+ ${OpenCV_LIBS}
+ )
+endforeach()
+
+# === 7. Optimization and LTO in Release builds ===
+foreach(_target IN ITEMS heiFIP main)
+ target_compile_options(${_target} PUBLIC
+ $<$:-O3 -march=native -funroll-loops -flto=auto -DNDEBUG>
+ )
+ target_link_options(${_target} PUBLIC
+ $<$:-flto=auto>
+ )
+endforeach()
\ No newline at end of file
diff --git a/heiFIP/assets/PcapHeaders.h b/heiFIP/assets/PcapHeaders.h
new file mode 100644
index 0000000..9ca148c
--- /dev/null
+++ b/heiFIP/assets/PcapHeaders.h
@@ -0,0 +1,54 @@
+#pragma once
+
+#include
+
+/**
+ * @struct PcapGlobalHeader
+ * @brief Represents the 24-byte global header at the start of a PCAP file.
+ *
+ * Fields (in file byte order, typically little-endian):
+ * magic_number : 4 bytes
+ * • Identifies the file as a PCAP. Common value 0xa1b2c3d4 (nanosecond‐resolution variants differ).
+ * version_major : 2 bytes
+ * • Major version of the libpcap file format (e.g., 2).
+ * version_minor : 2 bytes
+ * • Minor version of the libpcap file format (e.g., 4).
+ * thiszone : 4 bytes (signed)
+ * • Offset from UTC in seconds (usually 0). Historically used for timestamp adjustment.
+ * sigfigs : 4 bytes
+ * • Timestamp accuracy; typically set to 0 (no accuracy information).
+ * snaplen : 4 bytes
+ * • “Snapshot length” or maximum number of bytes captured per packet. Packets larger than this are truncated.
+ * network : 4 bytes
+ * • Data link type (DLT) identifier, e.g., 1 for Ethernet. Determines how to interpret raw packet headers.
+ */
+struct PcapGlobalHeader {
+ uint32_t magic_number; // File format identifier: 0xa1b2c3d4 (or swapped/endian variants)
+ uint16_t version_major; // Major version number (e.g., 2)
+ uint16_t version_minor; // Minor version number (e.g., 4)
+ int32_t thiszone; // GMT to local time correction (in seconds; usually 0)
+ uint32_t sigfigs; // Accuracy of timestamps (in microseconds; typically 0)
+ uint32_t snaplen; // Max length of captured packets, in bytes
+ uint32_t network; // Data link type (e.g., 1 = Ethernet)
+};
+
+/**
+ * @struct PcapPacketHeader
+ * @brief Represents the 16-byte per-packet header for each packet in a PCAP file.
+ *
+ * Fields (in file byte order, typically little-endian):
+ * ts_sec : 4 bytes
+ * • Timestamp, seconds portion, when the packet was captured.
+ * ts_usec : 4 bytes
+ * • Timestamp, microseconds portion (0–999999) for finer granularity.
+ * caplen : 4 bytes
+ * • Number of bytes of packet data actually saved in the file (may be ≤ original length).
+ * len : 4 bytes
+ * • Original length of the packet on the wire (before any truncation).
+ */
+struct PcapPacketHeader {
+ uint32_t ts_sec; // Timestamp: seconds since Epoch (Unix time)
+ uint32_t ts_usec; // Timestamp: microseconds past ts_sec
+ uint32_t caplen; // Captured length (number of bytes written to file)
+ uint32_t len; // Original packet length (on-the-wire size)
+};
\ No newline at end of file
diff --git a/heiFIP/assets/heiFIPPacketImage.cpp b/heiFIP/assets/heiFIPPacketImage.cpp
new file mode 100644
index 0000000..d0059fa
--- /dev/null
+++ b/heiFIP/assets/heiFIPPacketImage.cpp
@@ -0,0 +1,306 @@
+#pragma once
+
+#include
+#include
+#include "PcapHeaders.h" // Provides PcapPacketHeader for captured length
+#include
+#include
+#include
+#include
+#include
+
+/**
+ * @class heiFIPPacketImage
+ * @brief Base class for converting raw packet byte data into a 2D matrix representation.
+ *
+ * Responsibilities:
+ * - Store raw packet bytes (std::vector) and captured length (_cap_length).
+ * - Offer multiple constructors for different initialization styles:
+ * • Direct data + cap_length.
+ * • Data only (read cap_length from PcapPacketHeader).
+ * • Data + image-dimension parameters (dim, fill, auto_dim), which immediately
+ * build a square “tiled” matrix and store both the matrix and a “binaries” copy.
+ * - Provide utilities:
+ * • printHexData(): Print packet bytes in hex for debugging.
+ * • getHexData(): Retrieve raw bytes as a vector.
+ * • bit_array(): Convert each byte to its 8-bit binary representation, then pack into 4-bit nibbles.
+ * • get_matrix_tiled(): Build a dim×dim grayscale matrix from raw bytes, with padding/truncation.
+ * - Provide getters/setters for data, cap_length, and the computed matrix.
+ */
+class heiFIPPacketImage {
+public:
+ /**
+ * @brief Constructor: initialize with raw byte data and explicit captured length.
+ *
+ * @param data Vector of raw packet bytes (0–255).
+ * @param cap_length The “caplen” field from the pcap header indicating how many bytes were captured.
+ *
+ * Why:
+ * - Some callers know the cap_length in advance; this constructor lets them set both fields directly.
+ */
+ heiFIPPacketImage(std::vector data, uint32_t cap_length)
+ : _data(std::move(data)), _cap_length(cap_length)
+ {}
+
+ /**
+ * @brief Constructor: initialize with raw byte data only, reading cap_length from a PcapPacketHeader.
+ *
+ * @param data Vector of raw packet bytes.
+ *
+ * Workflow:
+ * 1. Store input bytes in _data.
+ * 2. Instantiate a PcapPacketHeader (uninitialized), then read its caplen member.
+ * This assumes that PcapPacketHeader() will auto-populate caplen appropriately (e.g., via global state).
+ * 3. Store caplen in _cap_length.
+ *
+ * Why:
+ * - In contexts where cap_length comes from a shared or externally managed PcapPacketHeader,
+ * callers need only supply the byte array; the header’s caplen is fetched internally.
+ */
+ heiFIPPacketImage(std::vector data)
+ : _data(std::move(data))
+ {
+ PcapPacketHeader packetHeader;
+ _cap_length = packetHeader.caplen;
+ }
+
+ /**
+ * @brief Constructor: initialize with raw byte data and immediately build a tiled image matrix.
+ *
+ * @param data Vector of raw packet bytes.
+ * @param dim Target dimension of the square output image (width = height = dim).
+ * If auto_dim is true, the actual dimension is computed as ceil(sqrt(length)).
+ * @param fill Fill value (0–255) used to pad if the flattened data is smaller than dim×dim.
+ * @param auto_dim If true, ignore provided dim and compute dim = ceil(sqrt(length of data)).
+ *
+ * Workflow:
+ * 1. Store input bytes in _data.
+ * 2. Instantiate a PcapPacketHeader to fetch cap_length (same as data-only constructor).
+ * 3. Call get_matrix_tiled(fill, dim, auto_dim), which returns:
+ * • result.first = 2D matrix (dim×dim) of uint8_t values (padded/truncated).
+ * • result.second = “binaries” vector-of-vectors, here just a single row of raw data.
+ * 4. Store result.first in matrix member and result.second in binaries member.
+ *
+ * Why:
+ * - Some callers want to immediately get a matrix representation upon construction,
+ * so this constructor does that in one step, storing both the matrix and raw-binary copy.
+ */
+ heiFIPPacketImage(std::vector data, int dim, int fill, bool auto_dim)
+ : _data(std::move(data))
+ {
+ PcapPacketHeader packetHeader;
+ _cap_length = packetHeader.caplen;
+
+ // Build the tiled matrix and binaries representation in one call.
+ auto result = heiFIPPacketImage::get_matrix_tiled(fill, dim, auto_dim);
+ heiFIPPacketImage::matrix = std::move(result.first);
+ heiFIPPacketImage::binaries = std::move(result.second);
+ }
+
+ ~heiFIPPacketImage() = default;
+
+ /**
+ * @brief Print the raw packet bytes in hexadecimal to stdout for debugging.
+ *
+ * Output format:
+ * “Packet has size (Size: bytes):”
+ * Then each byte printed in “HH ” (two-digit hex, space-separated).
+ */
+ void printHexData() const {
+ std::cout << std::dec
+ << "Packet has size"
+ << " (Size: " << get_cap_length() << " bytes):\n";
+ for (size_t i = 0; i < _data.size(); ++i) {
+ std::cout << std::hex
+ << std::setw(2) << std::setfill('0')
+ << static_cast(_data[i]) << " ";
+ }
+ std::cout << std::endl;
+ }
+
+ /**
+ * @brief Return a copy of the raw packet bytes as a vector.
+ *
+ * @return std::vector Each element is one byte from _data.
+ *
+ * Why:
+ * - Some image classes need a direct copy of the packet bytes.
+ * - Ensures callers cannot modify the original _data member.
+ */
+ std::vector getHexData() const {
+ std::vector hexData;
+ hexData.reserve(_data.size());
+ for (size_t i = 0; i < _data.size(); ++i) {
+ hexData.push_back(_data[i]);
+ }
+ return hexData;
+ }
+
+ /**
+ * @brief Convert raw bytes to a 4-bit–granularity “bit array.”
+ *
+ * Workflow:
+ * 1. Copy each byte from _data into a local vector called data.
+ * 2. For each byte, produce an 8-character string of ‘0’/‘1’ bits (std::bitset<8>).
+ * 3. Concatenate all these bit strings into one long string “bytes_as_bits.”
+ * 4. Walk through bytes_as_bits in 4-bit chunks; each chunk is interpreted as a binary number
+ * in range 0–15, then appended to transition vector.
+ * 5. Return transition, a vector of size ceil((8 * _data.size()) / 4).
+ *
+ * Why:
+ * - Some image formats (e.g., certain Markov or n-gram matrices) operate on 4-bit “nibble” values.
+ * - Converting each byte into two 4-bit values allows constructing those images.
+ */
+ std::vector bit_array() const {
+ // 1) Copy bytes so as not to modify _data
+ std::vector data;
+ data.reserve(_data.size());
+ for (uint8_t byte : _data) {
+ data.push_back(byte);
+ }
+
+ // 2) Build a concatenated string of bits, 8 bits per byte
+ std::string bytes_as_bits;
+ bytes_as_bits.reserve(data.size() * 8);
+ for (unsigned char byte : data) {
+ bytes_as_bits += std::bitset<8>(byte).to_string();
+ }
+
+ // 3) Group into 4-bit chunks and convert to byte values 0–15
+ std::vector transition;
+ transition.reserve((bytes_as_bits.size() + 3) / 4);
+ for (size_t i = 0; i < bytes_as_bits.length(); i += 4) {
+ // If remaining bits < 4 at the end, substring still works (std::stoi will parse up to end)
+ transition.push_back(
+ static_cast(
+ std::stoi(bytes_as_bits.substr(i, 4), nullptr, 2)
+ )
+ );
+ }
+ return transition;
+ }
+
+ /**
+ * @brief Build a square “tiled” matrix (dim × dim) from raw bytes, with padding or truncation.
+ *
+ * @param fill Value (0–255) to pad matrix cells if flattened data is shorter than dim².
+ * @param dim Desired dimension of the square output matrix (width = height = dim).
+ * @param auto_dim If true, compute dim = ceil( sqrt(max(binaries[i].size())) ) before flattening.
+ *
+ * Workflow:
+ * 1. Create a single-element vector-of-vectors called binaries, containing one row: hexData = getHexData().
+ * 2. Determine length = max row length in binaries (here, just hexData.size()).
+ * 3. If auto_dim is true, recompute dim = ceil(sqrt(length)).
+ * 4. Compute total = dim × dim.
+ * 5. Flatten binaries into one 1D vector “flat” (binaries only has one row here, but code is generic).
+ * 6. If flat.size() < total, append (total − flat.size()) copies of fill.
+ * 7. Else if flat.size() > total, truncate flat to size = total.
+ * 8. Allocate result as vector>(dim, vector(dim)).
+ * 9. Fill result[i][j] sequentially from flat[k], where i = k / dim, j = k % dim.
+ * 10. Return a pair: { result, binaries }.
+ *
+ * Returns:
+ * - first: dim×dim matrix of uint8_t
+ * - second: original “binaries” row(s) used (here, just hexData).
+ *
+ * Why:
+ * - Many image types represent packet bytes as a square grayscale image, padding/truncating as needed.
+ * - The “binaries” return value allows higher layers to also inspect the raw vector(s) of bytes.
+ */
+ std::pair>, std::vector>>
+ get_matrix_tiled(int fill, int dim, bool auto_dim) {
+ // 1) Build “binaries” as a vector of one row (hexData)
+ std::vector> binaries;
+ std::vector hexData = getHexData();
+ binaries.push_back(hexData);
+
+ // 2) Determine maximum row length in binaries (only one row here)
+ size_t length = 0;
+ for (const auto& b : binaries) {
+ if (b.size() > length) {
+ length = b.size();
+ }
+ }
+
+ // 3) If auto_dim is requested, compute dim = ceil(sqrt(length))
+ if (auto_dim) {
+ dim = static_cast(std::ceil(std::sqrt(static_cast(length))));
+ }
+
+ int total = dim * dim;
+
+ // 4) Flatten binaries into a single 1D array “flat”
+ std::vector flat;
+ flat.reserve(total);
+ for (const auto& row : binaries) {
+ flat.insert(flat.end(), row.begin(), row.end());
+ }
+
+ // 5) Pad with “fill” if too short
+ if (flat.size() < static_cast(total)) {
+ flat.insert(flat.end(), total - flat.size(), static_cast(fill));
+ }
+ // 6) Truncate if too long
+ else if (flat.size() > static_cast(total)) {
+ flat.resize(total);
+ }
+
+ // 7) Reshape into a 2D dim×dim matrix
+ std::vector> result(dim, std::vector(dim));
+ for (size_t idx = 0; idx < static_cast(total); ++idx) {
+ size_t i = idx / dim;
+ size_t j = idx % dim;
+ result[i][j] = flat[idx];
+ }
+
+ return { result, binaries };
+ }
+
+ /**
+ * @brief Get a copy of the raw packet data vector.
+ * @return std::vector Copy of _data.
+ */
+ std::vector get_data() const {
+ return _data;
+ }
+
+ /**
+ * @brief Replace the raw packet data.
+ * @param data New vector of raw bytes.
+ */
+ void set_data(std::vector data) {
+ _data = std::move(data);
+ }
+
+ /**
+ * @brief Get the captured length (caplen) of this packet.
+ * @return uint32_t The stored captured length.
+ */
+ uint32_t get_cap_length() const {
+ return _cap_length;
+ }
+
+ /**
+ * @brief Set the captured length (caplen).
+ * @param cap_length New captured length value.
+ */
+ void set_cap_length(uint32_t cap_length) {
+ _cap_length = cap_length;
+ }
+
+ /**
+ * @brief Return a reference to the stored 2D matrix.
+ * @return std::vector>& The dim×dim matrix built by a tiled constructor.
+ *
+ * Note: If get_matrix_tiled() was never called, matrix may be empty.
+ */
+ const std::vector>& get_matrix() const {
+ return matrix;
+ }
+
+private:
+ std::vector _data; ///< Raw bytes of the packet
+ uint32_t _cap_length; ///< Captured length from pcap header
+ std::vector> binaries; ///< Original binaries as rows (usually one row of raw bytes)
+ std::vector> matrix; ///< Tiled dim×dim matrix representation of packet bytes
+};
\ No newline at end of file
diff --git a/heiFIP/cli.cpp b/heiFIP/cli.cpp
new file mode 100644
index 0000000..a975eff
--- /dev/null
+++ b/heiFIP/cli.cpp
@@ -0,0 +1,157 @@
+#include
+#include
+#include
+#include
+
+#include "extractor.cpp"
+#include "runner.cpp"
+
+/// @brief Prints usage/help information for the CLI tool.
+void print_usage(const char* progName) {
+ std::cout << "Usage: " << progName << " [options]\n"
+ << " -i, --input FILE input pcap file path\n"
+ << " -o, --output DIR output directory\n"
+ << " -t, --threads N number of threads (default 1)\n"
+ << " -p, --processor TYPE preprocessing type: NONE or HEADER\n"
+ << " -m, --mode MODE image type: FlowImage, FlowImageTiledFixed, FlowImageTiledAuto,\n"
+ << " MarkovTransitionMatrixFlow, MarkovTransitionMatrixPacket, PacketImage\n"
+ << " --dim N image dimension\n"
+ << " --fill N fill value for missing data\n"
+ << " --cols N number of columns (used in some modes)\n"
+ << " --auto-dim enable auto-dimension (FlowImageTiledAuto, etc.)\n"
+ << " --append append mode for FlowImage\n"
+ << " --min-dim N minimum image dimension\n"
+ << " --max-dim N maximum image dimension\n"
+ << " --min-pkts N minimum packets per flow\n"
+ << " --max-pkts N maximum packets per flow\n"
+ << " --remove-dup remove duplicate packets/flows\n"
+ << " --name name of processed image\n "
+ << " -h, --help display this help and exit\n";
+}
+
+int main(int argc, char* argv[]) {
+ // CLI parameter variables
+ std::string input_file;
+ std::string output_dir;
+ int thread_count = 1;
+ PacketProcessorType proc_type = PacketProcessorType::NONE;
+ ImageType img_type = ImageType::PacketImage;
+
+ // Optional parameters with defaults
+ std::string image_name = "heiFIPGeneratedImage";
+ size_t dim = 0, fill = 0, cols = 0;
+ bool auto_dim = false, append = false;
+ size_t min_dim = 0, max_dim = 0;
+ size_t min_pkts = 0, max_pkts = 0;
+ bool remove_dup = false;
+
+ // Long options for getopt
+ static struct option long_opts[] = {
+ {"name", required_argument, 0, 0 },
+ {"input", required_argument, 0, 'i'},
+ {"output", required_argument, 0, 'o'},
+ {"threads", required_argument, 0, 't'},
+ {"processor", required_argument, 0, 'p'},
+ {"mode", required_argument, 0, 'm'},
+ {"dim", required_argument, 0, 0 },
+ {"fill", required_argument, 0, 0 },
+ {"cols", required_argument, 0, 0 },
+ {"auto-dim", no_argument, 0, 0 },
+ {"append", no_argument, 0, 0 },
+ {"min-dim", required_argument, 0, 0 },
+ {"max-dim", required_argument, 0, 0 },
+ {"min-pkts", required_argument, 0, 0 },
+ {"max-pkts", required_argument, 0, 0 },
+ {"remove-dup", no_argument, 0, 0 },
+ {"help", no_argument, 0, 'h'},
+ {0, 0, 0, 0}
+ };
+
+ // Parse command-line arguments
+ int opt;
+ int long_index = 0;
+ while ((opt = getopt_long(argc, argv, "i:o:t:p:m:h", long_opts, &long_index)) != -1) {
+ switch (opt) {
+ case 'i': input_file = optarg; break;
+ case 'o': output_dir = optarg; break;
+ case 't': thread_count = std::stoi(optarg); break;
+ case 'p':
+ if (std::string(optarg) == "NONE") proc_type = PacketProcessorType::NONE;
+ else if (std::string(optarg) == "HEADER") proc_type = PacketProcessorType::HEADER;
+ else { std::cerr << "Unknown processor type\n"; return 1; }
+ break;
+ case 'm':
+ if (std::string(optarg) == "PacketImage") img_type = ImageType::PacketImage;
+ else if (std::string(optarg) == "FlowImage") img_type = ImageType::FlowImage;
+ else if (std::string(optarg) == "FlowImageTiledFixed") img_type = ImageType::FlowImageTiledFixed;
+ else if (std::string(optarg) == "FlowImageTiledAuto") img_type = ImageType::FlowImageTiledAuto;
+ else if (std::string(optarg) == "MarkovFlow") img_type = ImageType::MarkovTransitionMatrixFlow;
+ else if (std::string(optarg) == "MarkovPacket") img_type = ImageType::MarkovTransitionMatrixPacket;
+ else { std::cerr << "Unknown mode\n"; return 1; }
+ break;
+ case 0:
+ if (strcmp(long_opts[long_index].name, "dim") == 0) dim = std::stoi(optarg);
+ else if (strcmp(long_opts[long_index].name, "fill") == 0) fill = std::stoi(optarg);
+ else if (strcmp(long_opts[long_index].name, "cols") == 0) cols = std::stoi(optarg);
+ else if (strcmp(long_opts[long_index].name, "auto-dim") == 0) auto_dim = true;
+ else if (strcmp(long_opts[long_index].name, "append") == 0) append = true;
+ else if (strcmp(long_opts[long_index].name, "min-dim") == 0) min_dim = std::stoi(optarg);
+ else if (strcmp(long_opts[long_index].name, "max-dim") == 0) max_dim = std::stoi(optarg);
+ else if (strcmp(long_opts[long_index].name, "min-pkts") == 0) min_pkts = std::stoi(optarg);
+ else if (strcmp(long_opts[long_index].name, "max-pkts") == 0) max_pkts = std::stoi(optarg);
+ else if (strcmp(long_opts[long_index].name, "remove-dup") == 0) remove_dup = true;
+ else if (strcmp(long_opts[long_index].name, "name") == 0) image_name = optarg;
+ break;
+ case 'h': print_usage(argv[0]); return 0;
+ default: print_usage(argv[0]); return 1;
+ }
+ }
+
+ // Input and output are required
+ if (input_file.empty() || output_dir.empty()) {
+ print_usage(argv[0]);
+ return 1;
+ }
+
+ Runner runner(thread_count); // Create runner with specified thread count
+
+ // Select argument type based on image type
+ ImageArgsVariant args;
+ switch (img_type) {
+ case ImageType::FlowImage:
+ args = FlowImageArgs{dim, append, fill};
+ break;
+ case ImageType::FlowImageTiledFixed:
+ args = FlowImageTiledFixedArgs{dim, fill, cols};
+ break;
+ case ImageType::FlowImageTiledAuto:
+ args = FlowImageTiledAutoArgs{dim, fill, auto_dim};
+ break;
+ case ImageType::MarkovTransitionMatrixFlow:
+ args = MarkovTransitionMatrixFlowArgs{cols};
+ break;
+ case ImageType::MarkovTransitionMatrixPacket:
+ args = MarkovTransitionMatrixPacketArgs{};
+ break;
+ case ImageType::PacketImage:
+ args = PacketImageArgs{dim, auto_dim, fill};
+ break;
+ }
+
+ // Main image generation call using the configured arguments
+ runner.create_image(
+ image_name, // Output image name
+ input_file, // Input `.pcap` file
+ output_dir, // Output directory
+ args, // Variant holding image arguments
+ proc_type, // Packet preprocessing strategy
+ img_type, // Image generation mode
+ min_dim, // Minimum image dimension
+ max_dim, // Maximum image dimension
+ min_pkts, // Minimum packets per flow
+ max_pkts, // Maximum packets per flow
+ remove_dup // Whether to remove duplicate flows
+ );
+
+ return 0;
+}
\ No newline at end of file
diff --git a/heiFIP/cmake/FindOpenSSL.cmake b/heiFIP/cmake/FindOpenSSL.cmake
new file mode 100644
index 0000000..6455dc7
--- /dev/null
+++ b/heiFIP/cmake/FindOpenSSL.cmake
@@ -0,0 +1,48 @@
+# --- cmake/Modules/FindOpenSSL.cmake ---
+
+# 1) Locate the directory that contains openssl/sha.h
+find_path(OPENSSL_INCLUDE_DIR
+ NAMES openssl/sha.h
+ PATHS
+ $ENV{OPENSSL_ROOT_DIR}/include # if user set OPENSSL_ROOT_DIR
+ /opt/homebrew/Cellar/openssl@3/*/include # glob into all versions
+ /opt/homebrew/include # Homebrew “flat” symlink
+ /usr/local/include
+ /usr/include
+)
+
+# 2) Locate the libraries (unchanged)
+find_library(OPENSSL_CRYPTO_LIBRARY
+ NAMES crypto
+ PATHS
+ $ENV{OPENSSL_ROOT_DIR}/lib
+ /opt/homebrew/Cellar/openssl@3/*/lib
+ /opt/homebrew/lib
+ /usr/local/lib
+ /usr/lib
+)
+find_library(OPENSSL_SSL_LIBRARY
+ NAMES ssl
+ PATHS
+ $ENV{OPENSSL_ROOT_DIR}/lib
+ /opt/homebrew/Cellar/openssl@3/*/lib
+ /opt/homebrew/lib
+ /usr/local/lib
+ /usr/lib
+)
+
+# 3) Standard boilerplate
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(OpenSSL
+ REQUIRED_VARS OPENSSL_INCLUDE_DIR
+ OPENSSL_CRYPTO_LIBRARY
+ OPENSSL_SSL_LIBRARY
+)
+
+if(OpenSSL_FOUND)
+ set(OpenSSL_INCLUDE_DIRS "${OPENSSL_INCLUDE_DIR}")
+ set(OpenSSL_LIBRARIES
+ "${OPENSSL_SSL_LIBRARY}"
+ "${OPENSSL_CRYPTO_LIBRARY}"
+ )
+endif()
\ No newline at end of file
diff --git a/heiFIP/cmake/FindPcapPlusPlus.cmake b/heiFIP/cmake/FindPcapPlusPlus.cmake
new file mode 100644
index 0000000..4552a4e
--- /dev/null
+++ b/heiFIP/cmake/FindPcapPlusPlus.cmake
@@ -0,0 +1,70 @@
+# cmake/Modules/FindPcapPlusPlus.cmake
+
+# 1) Locate the headers
+# --- find the raw include dir (may end up being /opt/homebrew/include) ---
+find_path(PPCPP_INCLUDE_DIR
+ NAMES SystemUtils.h
+ PATHS
+ $ENV{PCAPPLUSPLUS_ROOT}/include/pcapplusplus # direct Cellar path
+ /opt/homebrew/include/pcapplusplus # Homebrew symlink
+ HINTS
+ $ENV{PCAPPLUSPLUS_ROOT}
+ /usr/local
+ /usr
+)
+
+# --- if CMake only found the parent 'include' directory, but
+# the real headers are under include/pcapplusplus, fix it up ---
+if(PPCPP_INCLUDE_DIR)
+ # e.g. PPCPP_INCLUDE_DIR = /opt/homebrew/include
+ if(NOT EXISTS "${PPCPP_INCLUDE_DIR}/SystemUtils.h"
+ AND EXISTS "${PPCPP_INCLUDE_DIR}/pcapplusplus/SystemUtils.h")
+ message(STATUS " >> Adjusting Ppcpp include dir to subfolder ‘pcapplusplus/’")
+ set(PPCPP_INCLUDE_DIR "${PPCPP_INCLUDE_DIR}/pcapplusplus")
+ endif()
+endif()
+
+# 2) Locate the libraries
+find_library(PPCPP_COMMONPP_LIB
+ NAMES Common++
+ HINTS
+ ENV PCAPPLUSPLUS_ROOT
+ /usr/local/lib
+ /opt/homebrew/lib
+ /usr/lib
+)
+find_library(PPCPP_PACKETPP_LIB
+ NAMES Packet++
+ HINTS
+ ENV PCAPPLUSPLUS_ROOT
+ /usr/local/lib
+ /opt/homebrew/lib
+ /usr/lib
+)
+find_library(PPCPP_PCAPPP_LIB
+ NAMES Pcap++
+ HINTS
+ ENV PCAPPLUSPLUS_ROOT
+ /usr/local/lib
+ /opt/homebrew/lib
+ /usr/lib
+)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(PcapPlusPlus
+ REQUIRED_VARS PPCPP_INCLUDE_DIR
+ PPCPP_COMMONPP_LIB
+ PPCPP_PACKETPP_LIB
+ PPCPP_PCAPPP_LIB
+)
+
+if(PcapPlusPlus_FOUND)
+ set(PcapPlusPlus_INCLUDE_DIRS "${PPCPP_INCLUDE_DIR}")
+ set(PcapPlusPlus_LIBRARIES
+ ${PPCPP_COMMONPP_LIB}
+ ${PPCPP_PACKETPP_LIB}
+ ${PPCPP_PCAPPP_LIB}
+ pcap
+ pthread
+ )
+endif()
\ No newline at end of file
diff --git a/heiFIP/extractor.cpp b/heiFIP/extractor.cpp
new file mode 100644
index 0000000..c493144
--- /dev/null
+++ b/heiFIP/extractor.cpp
@@ -0,0 +1,539 @@
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "init.cpp"
+#include "flow.cpp"
+#include "flow_tiled_auto.cpp"
+#include "flow_tiled_fixed.cpp"
+#include "markov_chain.cpp"
+#include "heiFIPPacketImage.cpp"
+
+
+/**
+ * @struct FlowImageArgs
+ * @brief Parameters for creating a simple flow-based image.
+ * @param dim The target dimension (width and height) for the square output image.
+ * @param append If true, append new flow data to existing rows/columns rather than overwriting.
+ * @param fill Fill value to use when a flow has fewer packets than `dim` (padding).
+ */
+struct FlowImageArgs {
+ size_t dim;
+ bool append;
+ size_t fill;
+};
+
+/**
+ * @struct FlowImageTiledFixedArgs
+ * @brief Parameters for creating a tiled flow image with a fixed number of columns.
+ * @param dim The dimension (width and height) of each tile (sub-image).
+ * @param fill Fill value used to pad tiles that have fewer packets than `dim * dim`.
+ * @param cols The number of columns of tiles to arrange horizontally.
+ */
+struct FlowImageTiledFixedArgs {
+ size_t dim;
+ size_t fill;
+ size_t cols;
+};
+
+/**
+ * @struct FlowImageTiledAutoArgs
+ * @brief Parameters for creating a tiled flow image where the number of tiles per row is determined automatically.
+ * @param dim The approximate dimension (width/height) of each tile.
+ * @param fill Fill value for padding each tile.
+ * @param auto_dim If true, adapt the actual tile size at runtime based on packet count and other heuristics.
+ */
+struct FlowImageTiledAutoArgs {
+ size_t dim;
+ size_t fill;
+ bool auto_dim;
+};
+
+/**
+ * @struct PacketImageArgs
+ * @brief Parameters for creating an image out of raw packet bytes (one image per packet).
+ * @param dim The dimension (width/height) of the output packet image.
+ * @param auto_dim If true, allow the image to grow/shrink based on packet length (otherwise force `dim x dim`).
+ * @param fill Fill value to pad packet data if it is shorter than `dim * dim`.
+ */
+struct PacketImageArgs {
+ size_t dim;
+ bool auto_dim;
+ size_t fill;
+};
+
+/**
+ * @struct MarkovTransitionMatrixFlowArgs
+ * @brief Parameters for creating a flow-level Markov transition matrix image.
+ * @param cols The number of columns (and rows) in the square transition matrix (state space size).
+ */
+struct MarkovTransitionMatrixFlowArgs {
+ size_t cols;
+};
+
+/**
+ * @struct MarkovTransitionMatrixPacketArgs
+ * @brief No parameters needed for packet‐level Markov transition matrix (state space inferred from packet features).
+ */
+struct MarkovTransitionMatrixPacketArgs {};
+
+/**
+ * @typedef ImageArgsVariant
+ * @brief A std::variant that can hold any of the argument structures above, or std::monostate if not initialized.
+ *
+ * Usage: use std::get(args) once you know which ImageType you are generating.
+ */
+using ImageArgsVariant = std::variant<
+ std::monostate,
+ FlowImageArgs,
+ FlowImageTiledFixedArgs,
+ FlowImageTiledAutoArgs,
+ PacketImageArgs,
+ MarkovTransitionMatrixFlowArgs,
+ MarkovTransitionMatrixPacketArgs
+>;
+
+/**
+ * @typedef UInt8Matrix
+ * @brief A 3D vector representing one or more grayscale images.
+ * Dimensions: [num_images][height][width], where each pixel is a uint8_t (0–255).
+ */
+using UInt8Matrix = std::vector>>;
+
+// This concept checks on thing on ImgType:
+// 1) `image.get_matrix()` must be valid and return something convertible to
+// const std::vector>&
+template
+concept IsFlowImage = requires(const ImgType& image) {
+ // Require `get_matrix() -> std::vector>&`
+ { image.get_matrix() } -> std::convertible_to>&>;
+};
+
+/**
+ * @enum ImageType
+ * @brief Enumeration of supported image‐generation modes.
+ *
+ * - FlowImage: One image per entire flow, packets arranged sequentially.
+ * - FlowImageTiledFixed: Splits each flow into fixed-size tiles and arranges them in a grid.
+ * - FlowImageTiledAuto: Similar to tiled fixed, but determines tile layout dynamically.
+ * - PacketImage: One image per packet, each packet’s raw bytes laid out row‐major.
+ * - MarkovTransitionMatrixFlow: Build a transition matrix between flow states (e.g., protocol flags).
+ * - MarkovTransitionMatrixPacket: Build a transition matrix between packet‐level states (e.g., byte patterns).
+ */
+enum class ImageType {
+ FlowImage,
+ FlowImageTiledFixed,
+ FlowImageTiledAuto,
+ PacketImage,
+ MarkovTransitionMatrixFlow,
+ MarkovTransitionMatrixPacket
+};
+
+/**
+ * @class FIPExtractor
+ * @brief Coordinates reading pcap data, preprocessing, creating various image formats, and saving results.
+ *
+ * Responsibilities:
+ * 1. Read packets from a file or in-memory list via PacketProcessor.
+ * 2. Convert packet/flow data into one of several image types (FlowImage, PacketImage, etc.).
+ * 3. Validate image dimensions and optionally suppress duplicates.
+ * 4. Save the generated grayscale image(s) to disk as PNG.
+ */
+class FIPExtractor {
+public:
+ /**
+ * @brief Verify that an image matrix meets size constraints and (optionally) isn’t a duplicate.
+ *
+ * @tparam ImgType A type providing:
+ * size() → number of rows (height),
+ * operator.size() → number of columns (width),
+ * data() → raw pointer or contiguous data buffer,
+ * dataSize() → total number of bytes.
+ * @param image The 2D (or 3D) matrix returned by ImgType::get_matrix().
+ * @param minImageDim Minimum allowed dimension (height or width). Reject if smaller.
+ * @param maxImageDim Maximum allowed dimension (height or width). Reject if larger; zero → no limit.
+ * @param removeDuplicates If true, compare this image’s raw bytes to a set of previously created images,
+ * and reject if it already exists. (Currently commented out; future feature.)
+ * @return true if image passes all checks, false otherwise.
+ */
+
+ template
+ bool verify(const ImgType& image,
+ size_t minImageDim,
+ size_t maxImageDim,
+ bool removeDuplicates)
+ {
+ size_t height = image.get_matrix().size();
+ size_t width = image.get_matrix()[0].size();
+
+ // Enforce minimum dimension constraint:
+ if (height < minImageDim || width < minImageDim) {
+ std::cout << "[!] Image not created: dimensions smaller than minimum ("
+ << minImageDim << ").\n";
+ return false;
+ }
+
+ // Enforce maximum dimension constraint (if nonzero):
+ if (maxImageDim != 0 && (height > maxImageDim || width > maxImageDim)) {
+ std::cout << "[!] Image not created: dimensions exceed maximum ("
+ << maxImageDim << ").\n";
+ return false;
+ }
+
+ if (removeDuplicates) {
+ std::vector> matrix = image.get_matrix();
+ if (imagesCreatedSet.count(matrix)) {
+ std::cout << "[!] Image not created: duplicate detected.\n";
+ return false;
+ }
+ imagesCreatedSet.insert({matrix, true});
+ }
+
+ return true;
+ }
+
+ /**
+ * @brief Default constructor initializes internal PacketProcessor.
+ */
+ FIPExtractor()
+ : processor()
+ {}
+
+ /**
+ * @brief Read packets from a pcap file, preprocess, convert to image(s), and return as matrices.
+ *
+ * @param input_file Path to the .pcap file. Must exist on disk.
+ * @param args Variant containing the specific parameters for the chosen ImageType.
+ * @param preprocessing_type NONE or HEADER: whether to strip non-header bytes, etc.
+ * @param image_type Which type of image(s) to create (see ImageType enum).
+ * @param min_image_dim Minimum image dimension; images smaller will be discarded.
+ * @param max_image_dim Maximum image dimension; images larger will be discarded.
+ * @param min_packets_per_flow Minimum packet count for a flow to produce an image (only relevant to flow modes).
+ * @param max_packets_per_flow Maximum packet count per flow; extra packets are dropped.
+ * @param remove_duplicates If true, drop identical packets/flows during preprocessing.
+ * @return UInt8Matrix A vector of 2D matrices ([num_images][height][width]) ready for saving.
+ * @throws std::runtime_error if input_file doesn’t exist or args aren’t initialized.
+ */
+ UInt8Matrix createImageFromFile(
+ const std::string& input_file,
+ const ImageArgsVariant& args,
+ PacketProcessorType preprocessing_type = PacketProcessorType::NONE,
+ ImageType image_type = ImageType::PacketImage,
+ int min_image_dim = 0,
+ int max_image_dim = 0,
+ int min_packets_per_flow = 0,
+ int max_packets_per_flow = 0,
+ bool remove_duplicates = false
+ ) {
+ // Verify existence of the pcap file before proceeding:
+ if (!std::filesystem::exists(input_file)) {
+ throw std::runtime_error("Input file does not exist: " + input_file);
+ }
+
+ // Read and preprocess packets from the file:
+ // - If remove_duplicates is true, duplicates are dropped here.
+ // - If max_packets_per_flow > 0, stop reading after that many packets.
+ std::vector> processed_packets =
+ processor.readPacketsFile(
+ input_file,
+ preprocessing_type,
+ remove_duplicates,
+ max_packets_per_flow
+ );
+
+ // Delegate to createMatrix, passing along preprocessing/filtering criteria
+ return createMatrix(
+ processed_packets,
+ preprocessing_type,
+ image_type,
+ min_image_dim,
+ max_image_dim,
+ min_packets_per_flow,
+ max_packets_per_flow,
+ remove_duplicates,
+ args
+ );
+ }
+
+ /**
+ * @brief Convert an in-memory list of RawPacket pointers to image(s).
+ *
+ * @param packets A vector of unique_ptr containing raw packet data.
+ * @param args Variant of parameters for the desired ImageType.
+ * @param preprocessing_type NONE/HEADER: how to preprocess each RawPacket.
+ * @param image_type Which image mode to use.
+ * @param min_image_dim Minimum image dimension threshold.
+ * @param max_image_dim Maximum image dimension threshold.
+ * @param min_packets_per_flow Minimum packet count to form a flow (flow-based modes only).
+ * @param max_packets_per_flow Maximum packet count per flow; extra packets are dropped.
+ * @param remove_duplicates If true, drop duplicate packets in preprocessing.
+ * @return UInt8Matrix A list of 2D matrices representing generated image(s).
+ */
+ UInt8Matrix createImageFromPacket(
+ std::vector>& packets,
+ const ImageArgsVariant& args,
+ PacketProcessorType preprocessing_type = PacketProcessorType::NONE,
+ ImageType image_type = ImageType::PacketImage,
+ size_t min_image_dim = 0,
+ size_t max_image_dim = 0,
+ size_t min_packets_per_flow = 0,
+ size_t max_packets_per_flow = 0,
+ bool remove_duplicates = false
+ ) {
+ // First, convert RawPacket vector into FIPPacket (which wraps RawPacket and extracts features):
+ std::vector> processed_packets =
+ processor.readPacketsList(packets, preprocessing_type, remove_duplicates);
+
+ // Delegate to createMatrix to produce the actual image(s):
+ return createMatrix(
+ processed_packets,
+ preprocessing_type,
+ image_type,
+ min_image_dim,
+ max_image_dim,
+ min_packets_per_flow,
+ max_packets_per_flow,
+ remove_duplicates,
+ args
+ );
+ }
+
+ /**
+ * @brief Core dispatcher that builds one or more images from FIPPacket data, based on ImageType.
+ *
+ * @param packets Preprocessed packets wrapped in unique_ptr.
+ * @param preprocessing_type Repeats the chosen preprocessing strategy (just for bookkeeping).
+ * @param image_type Determines which case in the switch to execute.
+ * @param min_image_dim Reject images smaller than this dimension.
+ * @param max_image_dim Reject images larger than this dimension; zero → no limit.
+ * @param min_packets_per_flow For flow-based modes: skip flows with fewer than this many packets.
+ * @param max_packets_per_flow For flow-based modes: truncate flows to this many packets.
+ * @param remove_duplicates If true, drop duplicates in `verify()`.
+ * @param args A variant containing exactly one of the argument structs required by the chosen ImageType.
+ * @return UInt8Matrix A list of image matrices; possibly empty if no image passed `verify()`.
+ * @throws std::runtime_error If `args` is std::monostate or ImageType is invalid.
+ */
+ UInt8Matrix createMatrix(
+ std::vector>& packets,
+ PacketProcessorType preprocessing_type,
+ ImageType image_type,
+ size_t min_image_dim,
+ size_t max_image_dim,
+ size_t min_packets_per_flow,
+ size_t max_packets_per_flow,
+ bool remove_duplicates,
+ const ImageArgsVariant& args
+ ) {
+ // Ensure the caller provided a valid argument struct for the chosen image type:
+ if (std::holds_alternative(args)) {
+ throw std::runtime_error("Image arguments not initialized for ImageType.");
+ }
+
+ // If we have a maximum packet‐per‐flow limit, cut the packet list down now:
+ if (max_packets_per_flow && packets.size() > max_packets_per_flow) {
+ packets.resize(max_packets_per_flow);
+ }
+
+ // Convert each FIPPacket into a heiFIPPacketImage (byte vector).
+ // We do this early so that flow‐based or packet‐based modes can all operate on the same type.
+ std::vector packets_copy;
+ packets_copy.reserve(packets.size());
+ for (const auto& packetPtr : packets) {
+ // Extract raw bytes from the FIPPacket’s underlying RawPacket
+ const uint8_t* packetData = packetPtr->getRawPacket()->getRawData();
+ size_t packetLen = packetPtr->getRawPacket()->getRawDataLen();
+
+ // Copy bytes into a std::vector
+ std::vector rawData;
+ rawData.reserve(packetLen);
+ for (size_t i = 0; i < packetLen; ++i) {
+ rawData.push_back(packetData[i]);
+ }
+
+ // Construct a packet‐image wrapper from rawData
+ packets_copy.emplace_back(rawData);
+ }
+
+ // Now switch on the image type; each case returns either 1 image (as a single‐element vector)
+ // or multiple images (e.g., a separate PacketImage for each packet).
+ switch (image_type) {
+ case ImageType::FlowImage: {
+ // If a flow doesn’t have enough packets, skip entirely:
+ if (packets.size() < min_packets_per_flow) {
+ return {}; // Return empty vector
+ }
+
+ // Extract the arguments specific to FlowImage:
+ auto flowArgs = std::get(args);
+
+ // Construct a FlowImage: uses packets_copy, desired dimension, fill value, and append flag
+ FlowImage image(packets_copy, flowArgs.dim, flowArgs.fill, flowArgs.append);
+
+ // Validate the resulting 2D matrix, then return it in a 1-element vector if valid:
+ if (verify(image, min_image_dim, max_image_dim, remove_duplicates)) {
+ return { image.get_matrix() };
+ }
+ return {};
+ }
+
+ case ImageType::FlowImageTiledFixed: {
+ if (packets.size() < min_packets_per_flow) {
+ return {};
+ }
+
+ auto tiledArgs = std::get(args);
+ FlowImageTiledFixed image(packets_copy, tiledArgs.dim, tiledArgs.fill, tiledArgs.cols);
+
+ if (verify(image, min_image_dim, max_image_dim, remove_duplicates)) {
+ return { image.get_matrix() };
+ }
+ return {};
+ }
+
+ case ImageType::FlowImageTiledAuto: {
+ if (packets.size() < min_packets_per_flow) {
+ return {};
+ }
+
+ auto autoArgs = std::get(args);
+ FlowImageTiledAuto image(packets_copy, autoArgs.dim, autoArgs.fill, autoArgs.auto_dim);
+
+ if (verify(image, min_image_dim, max_image_dim, remove_duplicates)) {
+ return { image.get_matrix() };
+ }
+ return {};
+ }
+
+ case ImageType::PacketImage: {
+ // Extract parameters for packet‐level images:
+ auto packetArgs = std::get(args);
+ UInt8Matrix images; // We may generate one image per packet
+
+ // Loop through each packet’s raw data, building a packet image:
+ for (const auto& pktPtr : packets) {
+ const uint8_t* data = pktPtr->getRawPacket()->getRawData();
+ size_t len = pktPtr->getRawPacket()->getRawDataLen();
+
+ std::vector rawData;
+ rawData.reserve(len);
+ for (size_t i = 0; i < len; ++i) {
+ rawData.push_back(data[i]);
+ }
+
+ // Create a packet‐level image (dim × dim or auto‐sized):
+ heiFIPPacketImage image(rawData, packetArgs.dim, packetArgs.fill, packetArgs.auto_dim);
+ auto matrix = image.get_matrix();
+
+ // Only include if it passes dimension checks:
+ if (verify(image, min_image_dim, max_image_dim, remove_duplicates)) {
+ images.push_back(matrix);
+ }
+ }
+
+ return images;
+ }
+
+ case ImageType::MarkovTransitionMatrixFlow: {
+ if (packets.size() < min_packets_per_flow) {
+ return {};
+ }
+
+ auto markovFlowArgs = std::get(args);
+ MarkovTransitionMatrixFlow image(packets_copy, markovFlowArgs.cols);
+
+ if (verify(image, min_image_dim, max_image_dim, remove_duplicates)) {
+ return { image.get_matrix() };
+ }
+ return {};
+ }
+
+ case ImageType::MarkovTransitionMatrixPacket: {
+ // Packet-level Markov: each packet produces one transition matrix image
+ UInt8Matrix images;
+
+ for (const auto& pktPtr : packets) {
+ const uint8_t* data = pktPtr->getRawPacket()->getRawData();
+ size_t len = pktPtr->getRawPacket()->getRawDataLen();
+
+ // Build a raw packet image (byte vector) first:
+ std::vector rawData;
+ rawData.reserve(len);
+ for (size_t i = 0; i < len; ++i) {
+ rawData.push_back(data[i]);
+ }
+ heiFIPPacketImage packetImage(rawData);
+
+ // Now build Markov transition matrix from that packetImage:
+ MarkovTransitionMatrixPacket image(packetImage);
+ auto matrix = image.get_matrix();
+
+ if (verify(image, min_image_dim, max_image_dim, remove_duplicates)) {
+ images.push_back(matrix);
+ }
+ }
+
+ return images;
+ }
+
+ default:
+ throw std::runtime_error("Unsupported ImageType passed to createMatrix");
+ }
+
+ // Should never get here because each switch-case returns or throws
+ return {};
+ }
+
+ /**
+ * @brief Write the first 2D image in a UInt8Matrix vector to disk as a PNG file.
+ *
+ * @param img A vector of 2D matrices. Only `img[0]` is used (grayscale).
+ * @param output_path The desired file path (without extension). A ".png" is appended.
+ *
+ * Steps:
+ * 1. Check that img is non-empty and contains at least one image.
+ * 2. Interpret img[0] as a grayscale pixel grid: height × width, each pixel 0–255.
+ * 3. Allocate an OpenCV Mat of type CV_8UC1 (single channel, 8-bit).
+ * 4. Copy each pixel from the 2D vector into the Mat’s row‐major buffer.
+ * 5. Ensure parent directory exists by calling std::filesystem::create_directories().
+ * 6. Write the Mat to disk using cv::imwrite(..., path + ".png").
+ */
+ void save_image(const UInt8Matrix& img, const std::string& output_path) {
+ // Quick sanity check: must have at least one image, and that image must be non-empty
+ if (img.empty() || img[0].empty() || img[0][0].empty()) {
+ std::cerr << "[!] Empty image, cannot save: " << output_path << "\n";
+ return;
+ }
+
+ // Work with the first image slice (assuming grayscale)
+ const auto& grayscale_image = img[0];
+ int height = static_cast(grayscale_image.size());
+ int width = static_cast(grayscale_image[0].size());
+
+ // Create an OpenCV Mat of the correct size and type (8‐bit unsigned, single channel)
+ cv::Mat mat(height, width, CV_8UC1);
+
+ // Copy pixel values row by row
+ for (int i = 0; i < height; ++i) {
+ uint8_t* row_ptr = mat.ptr(i);
+ for (int j = 0; j < width; ++j) {
+ row_ptr[j] = grayscale_image[i][j];
+ }
+ }
+
+ // Append .png extension and ensure parent directory exists
+ std::filesystem::path outp(output_path + ".png");
+ std::filesystem::create_directories(outp.parent_path());
+
+ // Write the PNG file to disk
+ cv::imwrite(outp.string(), mat);
+ }
+
+private:
+ PacketProcessor processor; ///< Responsible for reading pcap data, handling preprocessing, and converting RawPacket → FIPPacket
+ std::map>, bool> imagesCreatedSet;
+};
\ No newline at end of file
diff --git a/heiFIP/images/NetworkTrafficImage.hpp b/heiFIP/images/NetworkTrafficImage.hpp
new file mode 100644
index 0000000..a5bb3a5
--- /dev/null
+++ b/heiFIP/images/NetworkTrafficImage.hpp
@@ -0,0 +1,40 @@
+#pragma once
+
+/**
+ * @class NetworkTrafficImage
+ * @brief Base class for all traffic‐based image generators.
+ *
+ * Responsibilities:
+ * - Store common image parameters: a fill value and a base dimension.
+ * - Provide a common interface (via inheritance) for more specialized traffic image classes
+ * (e.g., FlowImage, MarkovTransitionMatrixFlow) to share these parameters.
+ *
+ * Members:
+ * _fill : Byte value (0–255) used to pad empty pixels when constructing images.
+ * _dim : Base dimension (e.g., tile size) used by derived classes as a starting value.
+ *
+ * Why:
+ * - Derived classes may need a default padding value and dimension for their image‐construction logic.
+ * - By centralizing these fields here, all traffic‐image types can uniformly receive and store them.
+ */
+class NetworkTrafficImage {
+private:
+ int _fill; ///< Value to pad empty or unused pixels when building images
+ int _dim; ///< Base dimension (e.g., tile width/height) for derived‐class image logic
+
+public:
+ /**
+ * @brief Constructor: initialize default fill value and dimension.
+ *
+ * @param fill Byte value used for padding (default = 0).
+ * @param dim Base dimension (default = 8). Derived classes may override or use this.
+ *
+ * Workflow:
+ * 1. Store `fill` in _fill.
+ * 2. Store `dim` in _dim.
+ * 3. Derived classes inherit these settings for use in their image‐building routines.
+ */
+ NetworkTrafficImage(int fill = 0, int dim = 8)
+ : _fill(fill), _dim(dim)
+ {}
+};
\ No newline at end of file
diff --git a/heiFIP/images/flow.cpp b/heiFIP/images/flow.cpp
new file mode 100644
index 0000000..05b0e18
--- /dev/null
+++ b/heiFIP/images/flow.cpp
@@ -0,0 +1,160 @@
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "NetworkTrafficImage.hpp"
+#include "heiFIPPacketImage.cpp"
+
+/**
+ * @class FlowImage
+ * @brief Converts a sequence of heiFIPPacketImage instances (one per flow) into a 2D matrix.
+ *
+ * Inherits from NetworkTrafficImage, which provides shared logic for traffic-based images.
+ * Responsibilities:
+ * - Accept a vector of packet images representing one flow.
+ * - Either “append” all packet byte vectors into a single long vector and reshape,
+ * or lay out each packet’s bytes on its own row, padding to a uniform length.
+ * - Provide getters for both the tiled matrix and the raw binaries.
+ */
+class FlowImage : public NetworkTrafficImage {
+public:
+ /**
+ * @brief Constructor: build a FlowImage from a list of packet‐level images.
+ *
+ * @param packets Vector of heiFIPPacketImage, each representing one packet’s bytes.
+ * @param dim If append=true, width of each row when concatenating all packets.
+ * If append=false, this is ignored (rows are padded to the maximum packet length).
+ * @param fill Value (0–255) to pad shorter rows (when not appending) or at end of concatenation.
+ * @param append If true, concatenate all packet byte arrays into one long vector and then
+ * split into rows of width=dim. If false, place each packet’s bytes on its own row.
+ *
+ * Workflow:
+ * 1. Call NetworkTrafficImage(fill, dim) to initialize base-class fields (e.g., storing fill and dim).
+ * 2. Store the input `packets` and `append` flag.
+ * 3. Call getMatrix(dim, append, fill, packets) to build:
+ * - matrix: 2D vector representing the flow image.
+ * - binaries: vector of each packet’s raw byte vector (for reference).
+ * 4. Store the returned matrix and binaries in member variables.
+ */
+ FlowImage(std::vector packets, int dim = 16, int fill = 0, bool append = false)
+ : NetworkTrafficImage(fill, dim), packets(packets), append(append)
+ {
+ auto result = getMatrix(dim, append, fill, packets);
+ matrix = std::move(result.first);
+ binaries = std::move(result.second);
+ }
+
+ /**
+ * @brief Get the raw binaries for each packet in the flow.
+ * @return Reference to the vector of vectors of uint8_t, one per packet.
+ */
+ std::vector>& get_binaries() {
+ return binaries;
+ }
+
+ /**
+ * @brief Get the 2D matrix representing the flow image.
+ * @return Reference to a 2D vector of size [numRows][numCols].
+ *
+ * If append=true, numRows = ceil(totalBytes / dim) and numCols = dim.
+ * If append=false, numRows = number of packets and numCols = max packet length.
+ */
+ const std::vector>& get_matrix() const {
+ return matrix;
+ }
+
+private:
+ std::vector packets; ///< Input packet images for this flow
+ bool append; ///< Whether to concatenate all packet bytes before reshaping
+ std::vector> matrix; ///< Resulting 2D image matrix
+ std::vector> binaries; ///< Original raw byte vectors (one per packet)
+
+ /**
+ * @brief Build the matrix and store raw binaries depending on the append flag.
+ *
+ * @param dim Desired width when appending all bytes into one long vector.
+ * @param append If true, concatenate all packet byte arrays first; otherwise treat each packet separately.
+ * @param fill Byte value used to pad incomplete rows.
+ * @param packets Vector of heiFIPPacketImage instances to process.
+ * @return pair:
+ * - first: 2D matrix of uint8_t values (each row corresponds to either a flow segment or a packet).
+ * - second: Raw packet‐byte vectors as originally extracted (“binaries”).
+ *
+ * Workflow when append=true:
+ * 1. For each heiFIPPacketImage in `packets`, call getHexData() to get a vector.
+ * 2. Append each packet’s bytes in sequence into one long vector `fh`.
+ * 3. Compute number of rows: rn = ceil(fh.size() / dim). Resize fh to rn*dim by appending zeros.
+ * 4. Allocate a 2D vector `reshaped` of size [rn][dim].
+ * 5. Copy fh[i*dim ... (i+1)*dim−1] into reshaped[i] for i in [0..rn−1].
+ * 6. Return {reshaped, binaries}.
+ *
+ * Workflow when append=false:
+ * 1. For each heiFIPPacketImage in `packets`, call getHexData() to get vector `binary`.
+ * 2. Track the maximum length among all `binary.size()`.
+ * 3. For each `binary`, create a new row `row = binary` then resize to length=maxLength, filling with `fill`.
+ * 4. Push `row` into `reshaped`.
+ * 5. Return {reshaped, binaries}.
+ */
+ std::pair>, std::vector>>
+ getMatrix(int dim, bool append, int fill, const std::vector& packets) {
+ std::vector> binaries;
+
+ // 1) Extract raw bytes from each heiFIPPacketImage
+ for (const auto& packet : packets) {
+ std::vector hexData = packet.getHexData();
+ binaries.push_back(std::move(hexData));
+ }
+
+ // If concatenating all packet bytes into one long flow image
+ if (append) {
+ std::vector fh;
+ // a) Append each packet’s bytes into fh
+ for (const auto& binary : binaries) {
+ fh.insert(fh.end(), binary.begin(), binary.end());
+ }
+
+ // b) Compute number of rows needed and pad with zeros
+ int rn = static_cast(fh.size()) / dim + (fh.size() % dim > 0 ? 1 : 0);
+ fh.resize(rn * dim, static_cast(0)); // Pad tail to make length = rn*dim
+
+ // c) Reshape into a 2D matrix of size [rn][dim]
+ std::vector> reshaped(rn, std::vector(dim));
+ for (int i = 0; i < rn; ++i) {
+ std::copy(
+ fh.begin() + i * dim,
+ fh.begin() + (i + 1) * dim,
+ reshaped[i].begin()
+ );
+ }
+
+ return { reshaped, binaries };
+ }
+ // If placing each packet’s bytes on its own row
+ else {
+ // a) Determine maximum packet length
+ size_t maxLength = 0;
+ for (const auto& binary : binaries) {
+ maxLength = std::max(maxLength, binary.size());
+ }
+
+ // b) Build one row per packet, padding each to maxLength with `fill`
+ std::vector> reshaped;
+ reshaped.reserve(binaries.size());
+ for (const auto& binary : binaries) {
+ std::vector row = binary; // Copy raw bytes
+ row.resize(maxLength, static_cast(fill)); // Pad to uniform length
+ reshaped.push_back(std::move(row));
+ }
+
+ return { reshaped, binaries };
+ }
+ }
+};
\ No newline at end of file
diff --git a/heiFIP/images/flow_tiled_auto.cpp b/heiFIP/images/flow_tiled_auto.cpp
new file mode 100644
index 0000000..cf9e79f
--- /dev/null
+++ b/heiFIP/images/flow_tiled_auto.cpp
@@ -0,0 +1,256 @@
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "heiFIPPacketImage.cpp"
+#include "NetworkTrafficImage.hpp"
+
+/**
+ * @class FlowImageTiledAuto
+ * @brief Builds a square, tiled image from a sequence of packet images, automatically determining tile dimensions.
+ *
+ * Inherits from NetworkTrafficImage, which stores a default fill value and base dimension.
+ * Responsibilities:
+ * - Convert each packet’s raw bytes into its own dim×dim tile, padding/truncating as needed.
+ * - Arrange all those tiles into a larger square grid (dim_total×dim_total), where dim_total = ceil(sqrt(numTiles)).
+ * - Provide getters for the final tiled matrix and the original per-packet binaries.
+ */
+class FlowImageTiledAuto : public NetworkTrafficImage {
+public:
+ /**
+ * @brief Constructor: prepare tiled flow image using automatic dimension calculation if requested.
+ *
+ * @param packets Vector of heiFIPPacketImage, each containing raw bytes for one packet.
+ * @param dim Base dimension for each packet’s tile (width = height = dim) if auto_dim=false.
+ * If auto_dim=true, each tile’s dim is recalculated as ceil(sqrt(maxPacketLength)).
+ * @param fill Byte value (0–255) used to pad shorter packet byte arrays when building each tile.
+ * @param auto_dim If true, automatically set each tile’s dim = ceil(sqrt(max length among all packets)).
+ *
+ * Workflow:
+ * 1. Call NetworkTrafficImage(fill, dim) to store base fill and dim.
+ * 2. Store given `packets` and `auto_dim` flag in members.
+ * 3. Call get_matrix_tiled(fill, dim, auto_dim, packets), which:
+ * a. Extracts raw bytes from each packet image.
+ * b. Finds max packet length; if auto_dim, compute dim = ceil(sqrt(maxLength)).
+ * c. For each packet, reshape its bytes into a dim×dim tile (row-major), padding with `fill`.
+ * d. Compute dim_total = ceil(sqrt(numPackets)).
+ * e. Arrange all packet tiles into a dim_total×dim_total grid by:
+ * • Placing tiles row by row, concatenating horizontally via npconcatenate().
+ * • Padding with zero tiles (via npzero()) if fewer than dim_total² packets.
+ * f. Return {tiledMatrix, binaries}, where binaries is the vector of each packet’s raw byte vector.
+ * 4. Store the returned tiled matrix and binaries in member variables.
+ */
+ FlowImageTiledAuto(const std::vector& packets, int dim = 16, int fill = 0, bool auto_dim = false)
+ : NetworkTrafficImage(fill, dim), packets(packets), auto_dim(auto_dim)
+ {
+ auto result = get_matrix_tiled(fill, dim, auto_dim, packets);
+ matrix = std::move(result.first);
+ binaries = std::move(result.second);
+ }
+
+ /**
+ * @brief Get the final tiled image matrix (square of tiles stacked).
+ * @return Reference to a 2D vector of size [dim_total*dim][dim_total*dim].
+ */
+ const std::vector>& get_matrix() const {
+ return matrix;
+ }
+
+ /**
+ * @brief Get the raw byte vectors for each packet (binaries used to build tiles).
+ * @return Reference to a vector of vectors, one per packet.
+ */
+ std::vector>& get_binaries() {
+ return binaries;
+ }
+
+private:
+ std::vector packets; ///< Input packet images
+ bool auto_dim; ///< Whether to recalc tile dim = ceil(sqrt(maxPacketLength))
+ std::vector> matrix; ///< Final tiled flow image
+ std::vector> binaries; ///< Raw byte vectors for each packet
+
+ /**
+ * @brief Build per-packet tiles and assemble them into one large square matrix.
+ *
+ * @param fill Byte value to use when padding individual packet tiles.
+ * @param dim Base dimension for each packet tile (unless overridden by auto_dim).
+ * @param auto_dim If true, recompute dim = ceil(sqrt(max packet length)).
+ * @param packets Vector of heiFIPPacketImage, each containing raw bytes for one packet.
+ * @return pair:
+ * - first: 2D tiled image (size = dim_total*dim × dim_total*dim).
+ * - second: Original raw byte vectors (for reference).
+ *
+ * Workflow:
+ * 1. Extract raw bytes from each packet (packet.getHexData()) into `binaries`.
+ * 2. Determine max packet length across all binaries.
+ * 3. If auto_dim=true, set dim = ceil(sqrt(maxLength)).
+ * 4. For each packet’s byte vector `x`:
+ * a. Allocate a dim×dim tile, initialized to `fill`.
+ * b. Copy x[k] into tile[i][j] for k from 0 to x.size()-1, filling row-major:
+ * • i = k / dim, j = k % dim; stop when k ≥ x.size() or out of bounds.
+ * c. Store that tile in a temporary list `result` (vector of 2D arrays).
+ * 5. Compute dim_total = ceil(sqrt(numPackets)) → number of tiles per row/column.
+ * 6. Call tile_images(result, dim_total, dim) to arrange tiles into one big matrix:
+ * a. Build rows of concatenated tiles horizontally: each row has dim_total tiles side by side.
+ * Use npzero(dim) to fill missing tiles if numPackets < dim_total².
+ * Use npconcatenate() to join tiles horizontally (rows must have same height=dim).
+ * b. After building each row (dim rows high, width = dim_total*dim), stack all rows vertically.
+ * 7. Return {tiledMatrix, binaries}.
+ */
+ std::pair>, std::vector>>
+ get_matrix_tiled(int fill, int dim, bool auto_dim, const std::vector& packets) {
+ // 1) Extract raw bytes from each packet and push into binaries
+ std::vector> binaries;
+ for (const heiFIPPacketImage& packet : packets) {
+ binaries.push_back(packet.getHexData());
+ }
+
+ // 2) Determine the maximum length among all packet byte vectors
+ size_t length = 0;
+ for (const auto& b : binaries) {
+ length = std::max(length, b.size());
+ }
+
+ // 3) If auto_dim=true, set each tile’s dim = ceil(sqrt(length))
+ if (auto_dim) {
+ dim = static_cast(std::ceil(std::sqrt(static_cast(length))));
+ }
+
+ // 4) Build a 3D list of per-packet dim×dim tiles
+ std::vector>> result;
+ for (const auto& x : binaries) {
+ // a) Initialize a dim×dim tile with `fill`
+ std::vector> reshaped(dim, std::vector(dim, static_cast(fill)));
+
+ // b) Copy x[k] into reshaped row-major until x is exhausted or tile is filled
+ size_t k = 0;
+ for (int i = 0; i < dim && k < x.size(); ++i) {
+ for (int j = 0; j < dim && k < x.size(); ++j) {
+ reshaped[i][j] = x[k++];
+ }
+ }
+ result.push_back(std::move(reshaped));
+ }
+
+ // 5) Compute dim_total = ceil(sqrt(number of tiles)) → grid is dim_total×dim_total tiles
+ size_t length_total = result.size();
+ uint dim_total = static_cast(std::ceil(std::sqrt(static_cast(length_total))));
+
+ // 6) Arrange all tiles into a large tiled image
+ std::vector> fh = tile_images(result, dim_total, dim);
+ return { fh, binaries };
+ }
+
+ /**
+ * @brief Create a dim×dim tile filled with zeros.
+ *
+ * @param dim Dimension for both width and height.
+ * @return 2D vector of size [dim][dim], all elements = 0.
+ *
+ * Why:
+ * - Used to fill grid slots when numPackets < dim_total², ensuring the final image remains square.
+ */
+ std::vector> npzero(size_t dim) {
+ return std::vector>(dim, std::vector(dim, static_cast(0)));
+ }
+
+ /**
+ * @brief Horizontally concatenate two same-height images (2D arrays).
+ *
+ * @param img1 First image: vector of rows, each row is a vector.
+ * @param img2 Second image: must have same number of rows as img1.
+ * @return Concatenated image: each row is img1[row] followed by img2[row].
+ *
+ * Throws:
+ * - std::invalid_argument if img1 and img2 have different heights.
+ *
+ * Why:
+ * - Used in tile_images() to join tiles side by side when building each row of the grid.
+ */
+ std::vector> npconcatenate(const std::vector>& img1,
+ const std::vector>& img2)
+ {
+ if (img1.empty()) return img2;
+ if (img2.empty()) return img1;
+
+ if (img1.size() != img2.size()) {
+ throw std::invalid_argument("Images must have the same number of rows to concatenate horizontally.");
+ }
+
+ std::vector> result = img1;
+ for (size_t i = 0; i < result.size(); ++i) {
+ result[i].insert(result[i].end(), img2[i].begin(), img2[i].end());
+ }
+ return result;
+ }
+
+ /**
+ * @brief Arrange a list of per-packet tiles into a single large square image.
+ *
+ * @param images 3D vector: [numTiles][dim][dim], each is a dim×dim tile.
+ * @param cols Number of tiles per row/column in the final grid (dim_total).
+ * @param dim Dimension of each tile (width = height = dim).
+ * @return 2D vector of size [dim_total*dim][dim_total*dim], the tiled image.
+ *
+ * Workflow:
+ * 1. For each row i in [0..cols−1]:
+ * a. Initialize an empty 2D array `row` (to accumulate tile rows).
+ * b. For each column j in [0..cols−1]:
+ * - If k < images.size(), let im = images[k], else let im = npzero(dim).
+ * - If `row` is empty, set row = im; else row = npconcatenate(row, im).
+ * - Increment k.
+ * c. Append `row` to `rows` (vector of row-blocks).
+ * 2. Initialize `tiled` = rows[0].
+ * 3. For i in [1..rows.size()−1], append rows[i] to the bottom of `tiled` using vector::insert.
+ * 4. Return `tiled`, which now has height = cols*dim and width = cols*dim.
+ *
+ * Why:
+ * - Ensures that if there are fewer tiles than cols², the missing slots are zero-filled, maintaining a square.
+ * - Maintains row-major order: first fill the top-left tile, then the next tile to its right, etc.
+ */
+ std::vector> tile_images(const std::vector>>& images,
+ const uint cols, const uint dim)
+ {
+ std::vector>> rows;
+ size_t k = 0; // Tracks which tile we’re on
+
+ // 1) Build each tile row (concatenate tiles horizontally)
+ for (size_t i = 0; i < cols; ++i) {
+ std::vector> row; // Start with an empty row-block
+ for (size_t j = 0; j < cols; ++j) {
+ std::vector> im;
+ if (k < images.size()) {
+ im = images[k]; // Use actual tile
+ } else {
+ im = npzero(dim); // Use zero tile if no more packets
+ }
+
+ if (row.empty()) {
+ row = std::move(im);
+ } else {
+ row = npconcatenate(row, im);
+ }
+ ++k;
+ }
+ rows.push_back(std::move(row));
+ }
+
+ // 2) Stack all rows vertically to form the final tiled image
+ std::vector> tiled = std::move(rows[0]);
+ for (size_t i = 1; i < rows.size(); ++i) {
+ tiled.insert(tiled.end(), rows[i].begin(), rows[i].end());
+ }
+ return tiled;
+ }
+};
\ No newline at end of file
diff --git a/heiFIP/images/flow_tiled_fixed.cpp b/heiFIP/images/flow_tiled_fixed.cpp
new file mode 100644
index 0000000..cff317d
--- /dev/null
+++ b/heiFIP/images/flow_tiled_fixed.cpp
@@ -0,0 +1,237 @@
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "heiFIPPacketImage.cpp"
+#include "NetworkTrafficImage.hpp"
+
+/**
+ * @class FlowImageTiledFixed
+ * @brief Builds a fixed-grid tiled image from a sequence of packet images.
+ *
+ * Inherits from NetworkTrafficImage, which provides base logic for traffic-based images.
+ * Responsibilities:
+ * - Convert each packet’s raw bytes into its own dim×dim tile, padding/truncating as needed.
+ * - Arrange all those tiles into a fixed-size grid with `cols` tiles per row and per column.
+ * - Provide getters for both the tiled matrix and the original per-packet binaries.
+ */
+class FlowImageTiledFixed : public NetworkTrafficImage {
+public:
+ /**
+ * @brief Constructor: prepare tiled flow image using a fixed number of columns.
+ *
+ * @param packets Vector of heiFIPPacketImage, each containing raw bytes for one packet.
+ * @param dim Dimension for each packet’s tile (width = height = dim).
+ * @param fill Byte value (0–255) used to pad shorter packet byte arrays when building each tile.
+ * @param cols Number of tiles per row (and column) in the final grid. Grid is cols×cols tiles.
+ *
+ * Workflow:
+ * 1. Call NetworkTrafficImage(fill, dim) to store base fill and dim.
+ * 2. Store input `packets` and `cols` in member variables.
+ * 3. Call get_matrix_tiled(fill, dim, packets), which:
+ * a. Extracts raw bytes from each packet image into `binaries`.
+ * b. For each packet’s byte vector `x`:
+ * i. Allocate a dim×dim tile, initialized to `fill`.
+ * ii. Copy x[k] into tile[i][j] in row-major until x is exhausted or tile is filled.
+ * iii. Append that tile to a local list `result` (vector of 2D arrays).
+ * c. Call tile_images(result, cols, dim) to arrange exactly cols×cols tiles:
+ * i. Place tiles row by row, concatenating horizontally with npconcatenate().
+ * ii. If there are fewer than cols² tiles, use npzero(dim) to fill missing slots.
+ * iii. Stack all rows vertically to form the final matrix.
+ * 4. Store the returned matrix and binaries in member variables.
+ */
+ FlowImageTiledFixed(const std::vector& packets, int dim = 16, int fill = 0, int cols = 3)
+ : NetworkTrafficImage(fill, dim), packets(packets), cols(cols)
+ {
+ auto result = get_matrix_tiled(fill, dim, packets);
+ matrix = std::move(result.first);
+ binaries = std::move(result.second);
+ }
+
+ /**
+ * @brief Get the final tiled image matrix (fixed size: cols*dim by cols*dim).
+ * @return Reference to a 2D vector representing the tiled image.
+ */
+ const std::vector>& get_matrix() const {
+ return matrix;
+ }
+
+ /**
+ * @brief Get the raw byte vectors for each packet (binaries used to build tiles).
+ * @return Reference to a vector of vectors, one per packet’s bytes.
+ */
+ std::vector>& get_binaries() {
+ return binaries;
+ }
+
+private:
+ std::vector packets; ///< Input packet images
+ int cols; ///< Number of tiles per row/column
+ std::vector> matrix; ///< Final tiled flow image
+ std::vector> binaries; ///< Raw byte vectors for each packet
+
+ /**
+ * @brief Build per-packet tiles and assemble them into a fixed-size grid.
+ *
+ * @param fill Byte value to use when padding individual packet tiles.
+ * @param dim Dimension for each packet tile (width = height = dim).
+ * @param packets Vector of heiFIPPacketImage, each containing raw bytes for one packet.
+ * @return pair:
+ * - first: 2D tiled image (size = cols*dim × cols*dim).
+ * - second: Original raw byte vectors (for reference).
+ *
+ * Workflow:
+ * 1. Extract raw bytes from each packet (packet.getHexData()) into `binaries`.
+ * 2. For each packet’s byte vector `x`:
+ * a. Allocate a dim×dim tile, initialized to `fill`.
+ * b. Copy x[k] into tile[i][j] in row-major until x is exhausted or tile is filled.
+ * c. Append that tile to a local list `result` (vector of 2D tiles).
+ * 3. Call tile_images(result, cols, dim) to arrange exactly cols×cols tiles:
+ * a. Iterate over `cols` rows; for each row, iterate `cols` columns:
+ * • If a tile is available (k < result.size()), use it; else use npzero(dim).
+ * • Concatenate horizontally onto `row` via npconcatenate().
+ * b. Append each completed `row` to `rows`.
+ * c. Stack all `rows` vertically into one matrix: first row, then subsequent rows appended.
+ * 4. Return {tiledMatrix, binaries}.
+ */
+ std::pair>, std::vector>>
+ get_matrix_tiled(int fill, int dim, const std::vector& packets) {
+ std::vector> binaries;
+
+ // 1) Extract raw bytes from each heiFIPPacketImage
+ for (const heiFIPPacketImage& packet : packets) {
+ binaries.push_back(packet.getHexData());
+ }
+
+ // 2) Build a dim×dim tile for each packet’s bytes
+ std::vector>> result;
+ for (const auto& x : binaries) {
+ // a) Initialize a dim×dim tile filled with `fill`
+ std::vector> reshaped(dim, std::vector(dim, static_cast(fill)));
+ // b) Copy bytes into reshaped row-major
+ size_t k = 0;
+ for (size_t i = 0; i < static_cast(dim) && k < x.size(); ++i) {
+ for (size_t j = 0; j < static_cast(dim) && k < x.size(); ++j) {
+ reshaped[i][j] = x[k++];
+ }
+ }
+ result.push_back(std::move(reshaped));
+ }
+
+ // 3) Arrange the tiles into a fixed cols×cols grid
+ std::vector> fh = tile_images(result, static_cast(cols), static_cast(dim));
+ return { fh, binaries };
+ }
+
+ /**
+ * @brief Create a dim×dim tile filled entirely with zeros.
+ *
+ * @param dim Dimension for both width and height.
+ * @return 2D vector of size [dim][dim], all elements = 0.
+ *
+ * Why:
+ * - Used in tile_images() to fill missing slots when fewer than cols² packets are available.
+ */
+ std::vector> npzero(size_t dim) {
+ return std::vector>(dim, std::vector(dim, static_cast(0)));
+ }
+
+ /**
+ * @brief Horizontally concatenate two same-height images (2D arrays).
+ *
+ * @param img1 First image: vector of rows, each row is a vector.
+ * @param img2 Second image: must have same number of rows as img1.
+ * @return Concatenated image: each row is img1[row] followed by img2[row].
+ *
+ * Throws:
+ * - std::invalid_argument if img1 and img2 have different heights.
+ *
+ * Why:
+ * - Used in tile_images() to join tiles side by side when building each row of the grid.
+ */
+ std::vector> npconcatenate(const std::vector>& img1,
+ const std::vector>& img2)
+ {
+ if (img1.empty()) return img2;
+ if (img2.empty()) return img1;
+
+ if (img1.size() != img2.size()) {
+ throw std::invalid_argument("Images must have the same number of rows to concatenate horizontally.");
+ }
+
+ std::vector> result = img1;
+ for (size_t i = 0; i < result.size(); ++i) {
+ result[i].insert(result[i].end(), img2[i].begin(), img2[i].end());
+ }
+ return result;
+ }
+
+ /**
+ * @brief Arrange a list of per-packet tiles into one large fixed-grid image.
+ *
+ * @param images 3D vector: [numTiles][dim][dim], each is a dim×dim tile.
+ * @param cols Number of tiles per row/column in the final grid (fixed).
+ * @param dim Dimension of each tile (width = height = dim).
+ * @return 2D vector of size [cols*dim][cols*dim], the tiled image.
+ *
+ * Workflow:
+ * 1. For each row i in [0..cols−1]:
+ * a. Initialize an empty 2D array `row`.
+ * b. For each column j in [0..cols−1]:
+ * - If k < images.size(), let im = images[k]; else im = npzero(dim).
+ * - If `row` is empty, set row = im; else row = npconcatenate(row, im).
+ * - Increment k.
+ * c. Append this completed `row` (size = dim rows, width = cols*dim) into `rows`.
+ * 2. Initialize `tiled` = rows[0].
+ * 3. For i in [1..rows.size()−1], append rows[i] to the bottom of `tiled`.
+ * 4. Return `tiled`, which now has height = cols*dim and width = cols*dim.
+ *
+ * Why:
+ * - Having a fixed number of columns ensures a consistent final image size even if the number
+ * of packets < cols² (missing slots become zero-filled tiles).
+ */
+ std::vector> tile_images(const std::vector>>& images,
+ const uint cols, const uint dim)
+ {
+ std::vector>> rows;
+ size_t k = 0; // Tracks which tile we’re on
+
+ // 1) Build each tile row (concatenate tiles horizontally)
+ for (size_t i = 0; i < cols; ++i) {
+ std::vector> row; // Start with an empty row-block
+ for (size_t j = 0; j < cols; ++j) {
+ std::vector> im;
+ if (k < images.size()) {
+ im = images[k]; // Use actual tile
+ } else {
+ im = npzero(dim); // Zero tile if fewer than cols² packets
+ }
+
+ if (row.empty()) {
+ row = std::move(im);
+ } else {
+ row = npconcatenate(row, im);
+ }
+ ++k;
+ }
+ rows.push_back(std::move(row));
+ }
+
+ // 2) Stack all rows vertically to form the final tiled image
+ std::vector> tiled = std::move(rows[0]);
+ for (size_t i = 1; i < rows.size(); ++i) {
+ tiled.insert(tiled.end(), rows[i].begin(), rows[i].end());
+ }
+ return tiled;
+ }
+};
\ No newline at end of file
diff --git a/heiFIP/images/markov_chain.cpp b/heiFIP/images/markov_chain.cpp
new file mode 100644
index 0000000..047a410
--- /dev/null
+++ b/heiFIP/images/markov_chain.cpp
@@ -0,0 +1,270 @@
+#pragma once
+
+#include
+#include
+#include
+#include
+#include "heiFIPPacketImage.cpp"
+#include "NetworkTrafficImage.hpp"
+
+/**
+ * @class MarkovTransitionMatrix
+ * @brief Base class for computing a normalized, grayscale Markov transition matrix from a sequence of symbols.
+ *
+ * Responsibilities:
+ * - Given a 1D vector of “transitions” (values in [0..15]), count the transitions between consecutive symbols.
+ * - Normalize each row of the count matrix so that probabilities sum to 1, then scale to [0..255].
+ * - Return the resulting 16×16 matrix of uint8_t intensities.
+ */
+class MarkovTransitionMatrix : public NetworkTrafficImage {
+public:
+ /**
+ * @brief Compute a 16×16 Markov transition matrix from a sequence of 4-bit symbols.
+ *
+ * @param transitions Vector of length L containing values 0..15. Each adjacent pair
+ * (transitions[k], transitions[k+1]) contributes to the count at [i][j].
+ * @return 2D vector of size [16][16], where each cell holds a normalized probability
+ * scaled to [0..255]. Rows with zero total count remain all zeros.
+ *
+ * Workflow:
+ * 1. Allocate a 16×16 uintMatrix initialized to zero (counts of each transition).
+ * 2. For k in [0..L-2], let i = transitions[k], j = transitions[k+1]; increment uintMatrix[i][j].
+ * 3. For each row i in uintMatrix:
+ * a. Compute sum = Σ_j uintMatrix[i][j].
+ * b. If sum > 0, for each j: compute probability = uintMatrix[i][j] / sum.
+ * Then multiply by 255, clamp to [0..255], and store back as uint8_t.
+ * 4. Return the resulting 16×16 grayscale matrix.
+ *
+ * Why:
+ * - Captures the first-order Markov distribution between successive 4-bit values in a packet’s bit array.
+ * - Scaling to 0–255 yields a grayscale image representation suitable for CNNs or other image-based analysis.
+ */
+ std::vector> transition_matrix(const std::vector& transitions) {
+ const size_t n = 16;
+ // 1) Initialize a 16×16 count matrix to zero
+ std::vector> uintMatrix(n, std::vector(n, 0));
+
+ // 2) Count transitions between consecutive symbols
+ for (size_t k = 0; k + 1 < transitions.size(); ++k) {
+ size_t i = transitions[k];
+ size_t j = transitions[k + 1];
+ uintMatrix[i][j] += 1;
+ }
+
+ // 3) Normalize each row to probabilities and scale to [0..255]
+ for (auto& row : uintMatrix) {
+ double sum = 0.0;
+ // Compute total count for this row
+ for (double value : row) {
+ sum += value;
+ }
+ if (sum > 0.0) {
+ // Convert each count to a probability, multiply by 255, clamp, and cast to uint8_t
+ for (auto& value : row) {
+ double prob = static_cast(value) / sum;
+ double scaled = prob * 255.0;
+ // clamp to [0..255]
+ value = static_cast(std::clamp(scaled, 0.0, 255.0));
+ }
+ }
+ // If sum == 0, leave row as all zeros
+ }
+
+ return uintMatrix;
+ }
+};
+
+/**
+ * @class MarkovTransitionMatrixFlow
+ * @brief Builds a larger image by computing a Markov transition matrix for each packet in a flow,
+ * then arranging all 16×16 matrices into a fixed grid of tiles.
+ *
+ * Inherits from MarkovTransitionMatrix to leverage the transition_matrix() method.
+ * Responsibilities:
+ * - For each heiFIPPacketImage in `packets`, extract its 4-bit bit array and compute a 16×16 matrix.
+ * - Tile all per-packet matrices into a grid with `cols` tiles per row and column.
+ * - Store the final tiled matrix as a single 2D vector, accessible via get_matrix().
+ */
+class MarkovTransitionMatrixFlow : public MarkovTransitionMatrix {
+public:
+ /**
+ * @brief Constructor: compute and tile per-packet Markov matrices.
+ *
+ * @param packets Vector of heiFIPPacketImage, each representing one packet in the flow.
+ * @param cols Number of tiles per row and per column in the final grid (grid is cols×cols).
+ *
+ * Workflow:
+ * 1. Store `packets` and `cols`.
+ * 2. For each packet in `packets`:
+ * a. Call packet.bit_array() to get a vector of 4-bit values.
+ * b. Pass that vector to transition_matrix() to get a 16×16 grayscale matrix.
+ * c. Append that 16×16 matrix to a local list `result`.
+ * 3. Call tile_images(result, cols, 16) to arrange all 16×16 matrices into one large image:
+ * - Creates a cols×cols grid of 16×16 tiles.
+ * - If fewer than cols² matrices, fill missing spots with zero tiles (npzero).
+ * - Concatenate horizontally then vertically as necessary.
+ * 4. Store the final tiled image in member `matrix`.
+ */
+ MarkovTransitionMatrixFlow(const std::vector& packets, uint cols = 4)
+ : packets(packets), cols(cols)
+ {
+ std::vector>> result;
+ // 2) Compute a 16×16 Markov matrix for each packet
+ for (const heiFIPPacketImage& packet : packets) {
+ std::vector transition = packet.bit_array();
+ std::vector> m = transition_matrix(transition);
+ result.push_back(std::move(m));
+ }
+ // 3) Tile all 16×16 matrices into a cols×cols grid
+ matrix = tile_images(result, cols, 16);
+ }
+
+ /// Accessor for the final tiled flow image
+ const std::vector>& get_matrix() const {
+ return matrix;
+ }
+
+private:
+ std::vector packets; ///< Each packet in the flow
+ uint cols; ///< Number of tiles per row/column
+ MarkovTransitionMatrix transitionMatrix; ///< Base class instance (not strictly necessary)
+ std::vector> matrix; ///< Final tiled image composed of 16×16 tiles
+
+ /**
+ * @brief Create a 16×16 tile filled with zeros (if a packet’s matrix is missing).
+ *
+ * @param dim Tile dimension (16 for Markov matrices).
+ * @return 2D vector of size [dim][dim], all zeros.
+ */
+ std::vector> npzero(size_t dim) {
+ return std::vector>(dim, std::vector(dim, 0));
+ }
+
+ /**
+ * @brief Horizontally concatenate two same-height images (2D arrays).
+ *
+ * @param img1 First image: vector of rows, each row is a vector.
+ * @param img2 Second image: must have the same number of rows as img1.
+ * @return Concatenated image: each row is img1[row] followed by img2[row].
+ *
+ * Throws:
+ * - std::invalid_argument if img1 and img2 have different heights.
+ *
+ * Why:
+ * - Used in tile_images() to join 16×16 tiles side by side when constructing each grid row.
+ */
+ std::vector> npconcatenate(const std::vector>& img1,
+ const std::vector>& img2)
+ {
+ if (img1.empty()) return img2;
+ if (img2.empty()) return img1;
+
+ if (img1.size() != img2.size()) {
+ throw std::invalid_argument("Images must have the same number of rows to concatenate horizontally.");
+ }
+
+ std::vector> result = img1;
+ for (size_t i = 0; i < result.size(); ++i) {
+ result[i].insert(result[i].end(), img2[i].begin(), img2[i].end());
+ }
+ return result;
+ }
+
+ /**
+ * @brief Arrange a list of 16×16 tiles into one large square image of size [cols*dim][cols*dim].
+ *
+ * @param images 3D vector: [numTiles][16][16], each a 16×16 grayscale matrix.
+ * @param cols Number of tiles per row/column in the final grid.
+ * @param dim Dimension of each tile (16).
+ * @return 2D vector of size [cols*dim][cols*dim], the tiled image.
+ *
+ * Workflow:
+ * 1. Initialize an empty vector `rows` to hold each combined grid-row.
+ * 2. Set k = 0 to track current tile index.
+ * 3. For each row i in [0..cols−1]:
+ * a. Initialize an empty 16×0 “row” block.
+ * b. For j in [0..cols−1]:
+ * - If k < images.size(), let im = images[k]; else use a zero tile npzero(dim).
+ * - If row is empty, set row = im; else row = npconcatenate(row, im).
+ * - Increment k.
+ * c. Append the completed row block (size = dim rows, width = cols*dim) to `rows`.
+ * 4. Initialize `tiled` = rows[0].
+ * 5. For each subsequent row i in [1..rows.size()−1], append rows[i] to the bottom of `tiled`.
+ * 6. Return `tiled`.
+ *
+ * Why:
+ * - Ensures that if there are fewer than cols² packets, the missing grid slots are zero-filled tiles,
+ * preserving a square final image of consistent size.
+ */
+ std::vector> tile_images(const std::vector>>& images,
+ const uint cols, const uint dim)
+ {
+ std::vector>> rows;
+ size_t k = 0; // Tracks which tile index we’re on
+
+ // 1) Build each row of the tile grid
+ for (size_t i = 0; i < cols; ++i) {
+ std::vector> row; // Combined row of tiles
+ for (size_t j = 0; j < cols; ++j) {
+ std::vector> im;
+ if (k < images.size()) {
+ im = images[k]; // Use actual 16×16 tile
+ } else {
+ im = npzero(dim); // Use zero tile if no more images
+ }
+
+ if (row.empty()) {
+ row = std::move(im);
+ } else {
+ row = npconcatenate(row, im);
+ }
+ ++k;
+ }
+ rows.push_back(std::move(row));
+ }
+
+ // 2) Stack all rows vertically to form final tiled image
+ std::vector