diff --git a/.bumpversion.cfg b/.bumpversion.cfg deleted file mode 100644 index cb8dd72..0000000 --- a/.bumpversion.cfg +++ /dev/null @@ -1,6 +0,0 @@ -[bumpversion] -current_version = 0.2.9 -commit = True -tag = True - -[bumpversion:file:setup.py] \ No newline at end of file diff --git a/.github/workflows/build_test_linux.yml b/.github/workflows/build_test_linux.yml index 0dccbbc..90c449a 100644 --- a/.github/workflows/build_test_linux.yml +++ b/.github/workflows/build_test_linux.yml @@ -1,65 +1,70 @@ -name: build_test_linux +name: Linux Build + on: push: - branches: - - main + branches: [ main ] pull_request: - branches: - - main + branches: [ main ] + jobs: - build_test_linux: - name: ${{ matrix.python-version }} on ${{ matrix.os }} + build: runs-on: ${{ matrix.os }} strategy: - fail-fast: false matrix: - os: ["ubuntu-latest"] - python-version: ["pypy-3.9", - "pypy-3.8", - # "pypy-3.7", - "3.11", - "3.10", - "3.9", - "3.8", - # "3.7" - ] + os: [ubuntu-latest] + + env: + CMAKE_BUILD_TYPE: Release steps: - - uses: actions/checkout@v3 - with: - submodules: recursive - - name: Setup py-${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - # Caching improves build time, we use pythonLocation to cache everything including wheels to avoid building - # wheels at each build (pandas/Pypy is extremely time consuming) - # sed replacement is performed to rectify PyPy path which ends with /bin - # cache key takes into account the Python version of the runner to avoid version mismatch on updates. - - name: Get pip cache path - id: get-pip-path - run: | - id=$(echo ${{ env.pythonLocation }} | sed 's/\/bin//g') - echo "::set-output name=id::$id" + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up CMake + uses: lukka/get-cmake@v3.29.2 + + - name: Cache APT packages + uses: actions/cache@v4 + with: + path: | + /var/cache/apt/archives + key: ${{ runner.os }}-apt-${{ hashFiles('**/CMakeLists.txt') }} + restore-keys: | + ${{ runner.os }}-apt-${{ hashFiles('**/CMakeLists.txt') }} - - name: Pip cache - uses: actions/cache@v3 - id: pip-cache - with: - path: ${{ steps.get-pip-path.outputs.id }} - key: ${{ steps.get-pip-path.outputs.id }}-${{ hashFiles('setup.py') }}-${{ hashFiles('requirements.txt') }} + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y \ + build-essential \ + libssl-dev \ + libopencv-dev \ + libpcap-dev \ + libpthread-stubs0-dev \ + git - - name: Install requirements - if: steps.pip-cache.outputs.cache-hit != 'true' - run: | - python -m pip install --upgrade pip - python -m pip install -r requirements.txt + - name: Install precompiled PcapPlusPlus + run: | + curl -LO https://github.com/seladb/PcapPlusPlus/releases/download/v25.05/pcapplusplus-25.05-ubuntu-22.04-gcc-11.4.0-x86_64.tar.gz + tar -xzf pcapplusplus-25.05-ubuntu-22.04-gcc-11.4.0-x86_64.tar.gz + cd pcapplusplus-25.05-ubuntu-22.04-gcc-11.4.0-x86_64 + sudo cp -r include/* /usr/local/include/ + sudo cp -r lib/* /usr/local/lib/ + sudo ldconfig - - name: Build - run: | - python -m pip install . + - name: Configure and Build + run: | + cd heiFIP/ - - name: Test - if: startsWith(matrix.os, 'ubuntu') && !startsWith(matrix.python-version, '3.10') - run: | - python -m pytest tests.py + mkdir -p build && cd build + + cmake .. \ + -DCMAKE_BUILD_TYPE=Release \ + -DUSE_MANUAL_PCAPPLUSPLUS=ON \ + -DPcapPlusPlus_INCLUDE_DIRS="/usr/local/include/pcapplusplus/" \ + -DPcapPlusPlus_LIBRARIES="/usr/local/lib/libCommon++.a;/usr/local/lib/libPacket++.a;/usr/local/lib/libPcap++.a" \ + -DUSE_MANUAL_OPENSSL=ON \ + -DOPENSSL_INCLUDE_DIR="/usr/include/openssl" \ + -DOPENSSL_CRYPTO_LIBRARY="/usr/lib/x86_64-linux-gnu/libcrypto.a" + + make -j$(nproc) diff --git a/.github/workflows/build_test_macos.yml b/.github/workflows/build_test_macos.yml index c155e55..d8e1d1a 100644 --- a/.github/workflows/build_test_macos.yml +++ b/.github/workflows/build_test_macos.yml @@ -1,67 +1,51 @@ -name: build_test_macos +name: MacOS Build + on: push: - branches: - - main + branches: [ main ] pull_request: - branches: - - main + branches: [ main ] + jobs: - build_test_macos: - name: ${{ matrix.python-version }} on ${{ matrix.os }} + build: runs-on: ${{ matrix.os }} strategy: - fail-fast: false matrix: - os: ["macos-12"] - python-version: ["pypy-3.9", - "pypy-3.8", - # "pypy-3.7", - # "3.11", - # Not available yet. - "3.10", - "3.9", - "3.8", - # "3.7" - ] + os: [macos-latest] + + env: + CMAKE_BUILD_TYPE: Release steps: - - uses: actions/checkout@v3 - with: - submodules: recursive - - name: Setup py-${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - # Caching improves build time, we use pythonLocation to cache everything including wheels to avoid building - # wheels at each build (pandas/Pypy is extremely time consuming) - # sed replacement is performed to rectify PyPy path which ends with /bin - # cache key takes into account the Python version of the runner to avoid version mismatch on updates. - - name: Get pip cache path - id: get-pip-path - run: | - id=$(echo ${{ env.pythonLocation }} | sed 's/\/bin//g') - echo "::set-output name=id::$id" + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up CMake + uses: lukka/get-cmake@v3.29.2 + + - name: Set up Homebrew cache + uses: actions/cache@v4 + with: + path: | + /Users/runner/Library/Caches/Homebrew + /usr/local/Homebrew/Library/Taps + /usr/local/Cellar + key: ${{ runner.os }}-brew-${{ hashFiles('**/CMakeLists.txt') }} + restore-keys: | + ${{ runner.os }}-brew-${{ hashFiles('**/CMakeLists.txt') }} - - name: Pip cache - uses: actions/cache@v3 - id: pip-cache - with: - path: ${{ steps.get-pip-path.outputs.id }} - key: ${{ steps.get-pip-path.outputs.id }}-${{ hashFiles('setup.py') }}-${{ hashFiles('requirements.txt') }} + - name: Install dependencies + run: | + brew update + brew install openssl opencv libpcap cmake git pcapplusplus - - name: Install requirements - if: steps.pip-cache.outputs.cache-hit != 'true' - run: | - python -m pip install --upgrade pip - python -m pip install -r requirements.txt + - name: Configure and Build + run: | + cd heiFIP/ - - name: Build - run: | - python -m pip install . + mkdir -p build && cd build - - name: Test - # On other versions then 3.9, we test only. (without coverage generation) - if: startsWith(matrix.os, 'macos') && !startsWith(matrix.python-version, '3.9') && !startsWith(github.ref, 'refs/tags/') - run: | - python -m pytest tests.py + cmake .. \ + -DCMAKE_BUILD_TYPE=Release + + make diff --git a/.github/workflows/build_test_windows.yml b/.github/workflows/build_test_windows.yml deleted file mode 100644 index 30b2c91..0000000 --- a/.github/workflows/build_test_windows.yml +++ /dev/null @@ -1,58 +0,0 @@ -name: build_test_windows -on: - push: - branches: - - main - pull_request: - branches: - - main -jobs: - build_test_windows: - name: ${{ matrix.python-version }} on ${{ matrix.os }} - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: ["windows-latest"] - python-version: ["pypy-3.9", - "pypy-3.8", - # "pypy-3.7", does work atm. - "3.11", - "3.9", - # "3.8", - # "3.7" - ] - - steps: - - uses: actions/checkout@v3 - with: - submodules: recursive - - name: Setup py-${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - - name: Setup msys2 - uses: msys2/setup-msys2@v2 - with: - msystem: MINGW64 - update: true - install: git unzip mingw-w64-x86_64-libjpeg-turbo mingw-w64-x86_64-zlib mingw-w64-x86_64-libtiff mingw-w64-x86_64-freetype mingw-w64-x86_64-lcms2 mingw-w64-x86_64-libwebp mingw-w64-x86_64-openjpeg2 mingw-w64-x86_64-libimagequant mingw-w64-x86_64-libraqm mingw-w64-x86_64-gcc mingw-w64-x86_64-python3 mingw-w64-x86_64-python3-pip mingw-w64-x86_64-python3-setuptools - - - name: Install requirements - run: | - python -m pip install --upgrade pip - python -m pip install -r requirements.txt - - - name: Build - env: - MSYSTEM: MINGW64 - MSYS2_PATH: D:/a/_temp/msys64 - run: | - python -m pip install . - - - name: Test - # On other versions then 3.9, we test only. (without coverage generation) - if: startsWith(matrix.os, 'windows') && !startsWith(matrix.python-version, '3.9') && !startsWith(github.ref, 'refs/tags/') - run: | - python -m pytest tests.py diff --git a/.github/workflows/build_wheel_publish.yml b/.github/workflows/build_wheel_publish.yml deleted file mode 100644 index 73dbb04..0000000 --- a/.github/workflows/build_wheel_publish.yml +++ /dev/null @@ -1,27 +0,0 @@ -name: build_wheel_publish - -on: - release: - types: [created] - -jobs: - build_wheels: - name: Build wheels - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - name: Set up Python - uses: actions/setup-python@v3 - with: - python-version: '3.x' - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install build - - name: Build package - run: python -m build - - name: Publish package - uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 - with: - user: __token__ - password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/README.md b/README.md index 4d4ce4a..672bf79 100644 --- a/README.md +++ b/README.md @@ -10,32 +10,10 @@ Currently, we only support **offline** network data analysis. However, we plan to adapt our library to support **online** network data too to enable live-probing of models. - - - - - - - - - @@ -63,9 +41,6 @@ However, we plan to adapt our library to support **online** network data too to MacOS WorkFlows - - Windows WorkFlows -
Live Notebook - - live notebook - -
Latest Release - - latest release - -
Supported Versions - - python3 - - - pypy3 - + Version 1.0
@@ -99,114 +74,135 @@ The idea to create heiFIP came from working with Deep Learning approaches to cla - **Max packets per flow** allows you to specify the maximum number of packets per flow. If the total number of packets is too great, the remaining images are discarded. - **Packet Image** converts a single packet into an image. - **Markov Transition Matrix Image**: converts a packet or a flow into a Markov representation. -- **Header** processing allows you to customize header fields of different protocols. It aims to remove biasing fields. For more details look into [header.py](https://github.com/stefanDeveloper/heiFIP/blob/main/heifip/plugins/header.py) +- **Header** processing allows you to customize header fields of different protocols. It aims to remove biasing fields. - **Remove Payload** options allows you to only work on header data. -- **Fast and flexible**: We rely on [Scapy](https://github.com/secdev/scapy) for our sniffing and header processing. Image preparation is based on raw bytes. +- **Fast and flexible**: The main image precessing is in raw bytes inside the image classes while for the header preprocessing is PcapPlusPlus is used. - **Machine learning orientation**: heiFIP aims to make Deep Learning approaches using network data as images reproducible and deployable. Using heiFIP as a common framework enables researches to test and verify their models. ## Examples | Image Type | Description | Example | |------------|-------------|---------| -| Packet | Converts a single packet into a square image. Size depends on the total length | ![SMB Connection](https://raw.githubusercontent.com/stefanDeveloper/heiFIP/main/examples/packet.png?raw=true) | -| Flow | Converts a flow packet into a square image | ![SMB Connection](https://raw.githubusercontent.com/stefanDeveloper/heiFIP/main/examples/flow-tiled.png?raw=true) | -| Markov Transition Matrix Packet | Converts a packet into a Markov Transition Matrix. Size is fixed to 16x16. | ![SMB Connection](https://raw.githubusercontent.com/stefanDeveloper/heiFIP/main/examples/markov-packet.png?raw=true) | -| Markov Transition Matrix Flow | Converts a flow into a Markov Transition Matrix. It squares the image based on the number of packets | ![SMB Connection](https://raw.githubusercontent.com/stefanDeveloper/heiFIP/main/examples/markov-flow.png?raw=true) | +| Packet | Converts a single packet into a square image. Size depends on the total length | ![SMB Connection](https://raw.githubusercontent.com/stefanDeveloper/heiFIP/heiFIP-cpp/examples/packet.png?raw=true) | +| Flow | Converts a flow packet into a square image | ![SMB Connection](https://raw.githubusercontent.com/stefanDeveloper/heiFIP/heiFIP-cpp/examples/flow-tiled.png?raw=true) | +| Markov Transition Matrix Packet | Converts a packet into a Markov Transition Matrix. Size is fixed to 16x16. | ![SMB Connection](https://raw.githubusercontent.com/stefanDeveloper/heiFIP/heiFIP-cpp/examples/markov-packet.png?raw=true) | +| Markov Transition Matrix Flow | Converts a flow into a Markov Transition Matrix. It squares the image based on the number of packets | ![SMB Connection](https://raw.githubusercontent.com/stefanDeveloper/heiFIP/heiFIP-cpp/examples/markov-flow.png?raw=true) | -## Getting Started +## Requirements -Install our package using PyPi +* **C++ Compiler**: GCC ≥ 9.0, Clang ≥ 10, or MSVC 2019 with C++17 support. +* **CMake**: Version ≥ 3.14 +* **PcapPlusPlus**: Installed system‑wide or built locally. ([https://github.com/seladb/PcapPlusPlus](https://github.com/seladb/PcapPlusPlus)) +* **OpenSSL**: For SHA256 hashing (libcrypto). +* **OpenCV**: Version ≥ 4.0 for image handling and saving (e.g., cv::imwrite). +* **pthread**: POSIX threads (Linux/macOS). Windows users require linking against `-lws2_32` and `-lIPHLPAPI`. +* **libpcap**: PCAP Support (Linux/macOS) -```sh -pip install heifip -``` -Now, you can use the integrate CLI: +Optional: -```sh -> fip -Usage: fip [OPTIONS] COMMAND [ARGS]... +* **getopt\_long**: For CLI parsing (provided by libc on Linux/macOS). Windows may need `getopt` replacement. -Options: - --version Show the version and exit. - -h, --help Show this message and exit. +## Building from source -Commands: - extract -``` -To extract images from PCAPs, we currently split the command into flow and packet: - -```sh -> fip extract -Starting FlowImageProcessor CLI -Usage: fip extract [OPTIONS] COMMAND [ARGS]... - -Options: - -h, --help Show this message and exit. - -Commands: - flow - packet - -# Show help information -> fip extract [flow/packet]-h -Starting FlowImageProcessor CLI -Usage: fip extract flow [OPTIONS] - -Options: - -w, --write PATH Destination file path, stores result [required] - -r, --read PATH [required] - -t, --threads INTEGER Number of parallel threads that can be used - [default: 4] - --preprocess [NONE|HEADER] Applies a preprocessing to the input data: none: - No preprocessing payload: Only payload data is - used header: Preprocesses headers - (DNS,HTTP,IP,IPv6,TCP,UDP supported) to remove - some biasing data [default: NONE] - --min_im_dim INTEGER Minimum dim ouput images need to have, 0=No - minimum dim [default: 0] - --max_im_dim INTEGER Maximum dim ouput images can have, 0=No maximum - dim [default: 0] - --remove_duplicates Within a single output folder belonging to a - single input folder no duplicate images will be - produced if two inputs lead to the same image - --min_packets INTEGER Minimum packets that a FlowImage needs to have, - 0=No minimum packets per flow [default: 0] - --max_packets INTEGER Minimum packets that a FlowImage needs to have, - 0=No minimum packets per flow [default: 0] - --append - --tiled - --width INTEGER [default: 128] - -h, --help Show this message and exit. - -> fip extract flow -r /PATH/PCAPs -w /PATH/IMAGES -``` +```bash +# Clone this repo +git clone https://github.com/yourusername/heiFIPCpp.git +cd heiFIP/heiFIP/ -Import FIPExtractor to run it inside your program: +# Create build directory +mkdir build && cd build -```python -extractor = FIPExtractor() -img = extractor.create_image('./test/pcaps/dns/dns-binds.pcap') -extractor.save_image(img, './test/pcaps/dns/dns-binds.pcap') -``` +cmake .. -### Building from source +# We highly recommend that locating necessary dependencies is done manually since espically +# Pcap Plus Plus is often not installed in standard locations. While we do use scripts to automatically detect +# the necessary dependencies if those scripts fail you can specify the paths to the include directories of the header +# files aswell as the paths to libaries manually like so. Also do not forget to specify all three of Pcap Plus Plus's +# libaries libCommon++, libPacket++, libPcap++. For OpenCV doing this manually while possible, due to number of links +# necessary, is very difficult. Since OpenCV is configured for Cmake anyway this is unnecessary anyway. When using macOS +# you need to be very careful that the linked libraries are not Intel (x86_64) bottles, since if this happens the code +# will still be compiled as ARM64 but dynamically linking against x86_64 .dylib. This forces macOS to convert +# back to ARM64 at runtime using Rosetta 2 which encures significant overhead. So if possible use a Linux distribution -Simply run: +cmake .. \ + -DCMAKE_BUILD_TYPE=Release \ + -DUSE_MANUAL_PCAPPLUSPLUS=ON \ + -DPcapPlusPlus_INCLUDE_DIRS="/opt/homebrew/Cellar/pcapplusplus/25.05/include" \ + -DPcapPlusPlus_LIBRARIES="/opt/homebrew/Cellar/pcapplusplus/25.05/lib/libCommon++.a\;/opt/homebrew/Cellar/pcapplusplus/25.05/lib/libPacket++.a\;/opt/homebrew/Cellar/pcapplusplus/25.05/lib/libPcap++.a" \ + -DUSE_MANUAL_OPENSSL=ON \ + -DOPENSSL_INCLUDE_DIR="/opt/homebrew/opt/openssl@3/include" \ + -DOPENSSL_CRYPTO_LIBRARY="/opt/homebrew/opt/openssl@3/lib/libcrypto.a" -``` -pip install . +# Compile +make -j$(nproc) + +# or +cmake --build build + +# The executable 'heiFIPCpp' will be produced in build/ ``` -### Publications that use heiFIP -- [A Generalizable Approach for Network Flow Image Representation for Deep Learning] - CSNet 23 -- [Explainable artificial intelligence for improving a session-based malware traffic classification with deep learning] - SSCI 23 +## Getting Started + +After installation the command line interface can be used to extract images from pcap files witht he following command +```bash +./heiFIPCpp \ + --name HelloHeiFIP + --input /path/to/capture.pcap \ + --output /path/to/outdir \ + --threads 4 \ + --processor HEADER \ + --mode FlowImageTiledAuto \ + --dim 16 \ + --apppend \ + --fill 0 \ + --min-dim 10 \ + --max-dim 2000 \ + --min-pkts 10 \ + --max-pkts 100 \ + --remove-dup +``` +### Options +| Flag | Description | +| ------------------- | -------------------------------------------------------------- | +| `-i`, `--input` | Input PCAP file path | +| `-o`, `--output` | Output directory | +| `-t`, `--threads` | Number of worker threads (default: 1) | +| `-p`, `--processor` | Preprocessing: `NONE` or `HEADER` | +| `-m`, `--mode` | Image type: `PacketImage`, `FlowImage`, `FlowImageTiledFixed`, | +| | `FlowImageTiledAuto`, `MarkovTransitionMatrixFlow`, | +| | `MarkovTransitionMatrixPacket` | +| `--dim` | Base dimension for image (e.g. width/height in pixels) | +| `--fill` | Fill or padding value (0–255) | +| `--cols` | Number of columns (for tiled/fixed or Markov flow) | +| `--auto-dim` | Enable auto‑dimension selection (bool) | +| `--append` | Enable auto‑dimension selection (bool) | +| `--min-dim` | Minimum allowed image dimension | +| `--max-dim` | Maximum allowed image dimension | +| `--min-pkts` | Minimum packets per flow (for tiled/flow modes) | +| `--max-pkts` | Maximum packets per flow | +| `--remove-dup` | Remove duplicate flows/packets by hash | +| `--name` | Filname of processed image | +| `-h`, `--help` | Show this help message | + +## Extending + +To add a new image type: + +1. Define a new `ImageArgs` struct in `extractor.cpp`. +2. Extend the `ImageType` enum. +3. Implement the conversion in `PacketProcessor::createImageFromPacket()`. +4. Update the CLI `--mode` parser to include your new type. + +--- -## Credits +### Publications that use heiFIP -[NFStream](https://github.com/nfstream/nfstream) for the inspiration of the `README.md` and workflow testing. +- S. Machmeier, M. Hoecker, V. Heuveline, "Explainable Artificial Intelligence for Improving a Session-Based Malware Traffic Classification with Deep Learning", in 2023 IEEE Symposium Series on Computational Intelligence (SSCI), Mexico-City, Mexico, 2023. https://doi.org/10.1109/SSCI52147.2023.10371980 +- S. Machmeier, M. Trageser, M. Buchwald, and V. Heuveline, "A generalizable approach for network flow image representation for deep learning", in 2023 7th Cyber Security in Networking Conference (CSNet), Montréal, Canada, 2023. https://doi.org/10.1109/CSNet59123.2023.10339761 ### Authors @@ -214,6 +210,7 @@ The following people contributed to heiFIP: - [Stefan Machmeier](https://github.com/stefanDeveloper): Creator - [Manuel Trageser](https://github.com/maxi99manuel99): Header extraction and customization. +- [Henri Rebitzky](https://github.com/HenriRebitzky): Coversion from python to c++ ## License diff --git a/examples/.DS_Store b/examples/.DS_Store new file mode 100644 index 0000000..5008ddf Binary files /dev/null and b/examples/.DS_Store differ diff --git a/examples/flow-tiled.png b/examples/flow-tiled.png index 8eacaf5..c5a40bb 100644 Binary files a/examples/flow-tiled.png and b/examples/flow-tiled.png differ diff --git a/examples/markov-flow.png b/examples/markov-flow.png index f6d4667..f943d24 100644 Binary files a/examples/markov-flow.png and b/examples/markov-flow.png differ diff --git a/examples/markov-packet.png b/examples/markov-packet.png index 63f6c67..56d807c 100644 Binary files a/examples/markov-packet.png and b/examples/markov-packet.png differ diff --git a/examples/packet.png b/examples/packet.png index d08f7b5..7475139 100644 Binary files a/examples/packet.png and b/examples/packet.png differ diff --git a/heiFIP/CMakeLists.txt b/heiFIP/CMakeLists.txt new file mode 100644 index 0000000..5829da7 --- /dev/null +++ b/heiFIP/CMakeLists.txt @@ -0,0 +1,97 @@ +cmake_minimum_required(VERSION 3.14) +project(heiFIP) + +set(CMAKE_CXX_STANDARD 20) + +# === 1. Default to Release mode === +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE) +endif() + +# === 2. Maximum optimization flags for Release === +set(CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -funroll-loops -flto=auto -DNDEBUG") +set(CMAKE_C_FLAGS_RELEASE "-O3 -march=native -funroll-loops -flto=auto -DNDEBUG") + +# === 3. Manual override support === + +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") + +# --- OpenSSL --- +option(USE_MANUAL_OPENSSL "Use manual OpenSSL paths" OFF) +if(USE_MANUAL_OPENSSL) + if(NOT DEFINED OPENSSL_INCLUDE_DIR OR NOT DEFINED OPENSSL_CRYPTO_LIBRARY) + message(FATAL_ERROR "Manual OpenSSL mode requires OPENSSL_INCLUDE_DIR and OPENSSL_CRYPTO_LIBRARY") + endif() + set(OpenSSL_INCLUDE_DIRS "${OPENSSL_INCLUDE_DIR}") + set(OpenSSL_LIBRARIES "${OPENSSL_CRYPTO_LIBRARY}") + if(DEFINED OPENSSL_SSL_LIBRARY) + list(APPEND OpenSSL_LIBRARIES "${OPENSSL_SSL_LIBRARY}") + endif() +else() + find_package(OpenSSL REQUIRED) + set(OpenSSL_INCLUDE_DIRS "${OPENSSL_INCLUDE_DIR}") + set(OpenSSL_LIBRARIES "${OPENSSL_CRYPTO_LIBRARY}") +endif() + +# --- OpenCV --- +option(USE_MANUAL_OPENCV "Use manual OpenCV paths" OFF) +if(USE_MANUAL_OPENCV) + if(NOT DEFINED OpenCV_INCLUDE_DIR OR NOT DEFINED OpenCV_LIBRARIES) + message(FATAL_ERROR "Manual OpenCV mode requires OpenCV_INCLUDE_DIR and OpenCV_LIBRARIES") + endif() + set(OpenCV_INCLUDE_DIRS "${OpenCV_INCLUDE_DIR}") + set(OpenCV_LIBS "${OpenCV_LIBRARIES}") +else() + find_package(OpenCV REQUIRED) + set(OpenCV_INCLUDE_DIRS "${OpenCV_INCLUDE_DIRS}") + set(OpenCV_LIBS "${OpenCV_LIBRARIES}") +endif() + +# --- PcapPlusPlus --- +option(USE_MANUAL_PCAPPLUSPLUS "Use manual PcapPlusPlus paths" OFF) +if(USE_MANUAL_PCAPPLUSPLUS) + if(NOT DEFINED PcapPlusPlus_INCLUDE_DIRS OR NOT DEFINED PcapPlusPlus_LIBRARIES) + message(FATAL_ERROR "Manual PcapPlusPlus mode requires PcapPlusPlus_INCLUDE_DIRS and PcapPlusPlus_LIBRARIES") + endif() + # Append required external dependencies manually + list(APPEND PcapPlusPlus_LIBRARIES pcap pthread) +else() + find_package(PcapPlusPlus REQUIRED) +endif() + +# === 4. User project headers === +include_directories( + ${CMAKE_SOURCE_DIR}/assets + ${CMAKE_SOURCE_DIR}/plugins + ${CMAKE_SOURCE_DIR}/images + ${CMAKE_SOURCE_DIR}/layers +) + +# === 5. Executables === +add_executable(heiFIP cli.cpp) +add_executable(main main.cpp) +# main2.cpp removed + +# === 6. Apply includes and libraries === +foreach(_target IN ITEMS heiFIP main) + target_include_directories(${_target} PUBLIC + ${PcapPlusPlus_INCLUDE_DIRS} + ${OpenSSL_INCLUDE_DIRS} + ${OpenCV_INCLUDE_DIRS} + ) + target_link_libraries(${_target} PUBLIC + ${OpenSSL_LIBRARIES} + ${PcapPlusPlus_LIBRARIES} + ${OpenCV_LIBS} + ) +endforeach() + +# === 7. Optimization and LTO in Release builds === +foreach(_target IN ITEMS heiFIP main) + target_compile_options(${_target} PUBLIC + $<$:-O3 -march=native -funroll-loops -flto=auto -DNDEBUG> + ) + target_link_options(${_target} PUBLIC + $<$:-flto=auto> + ) +endforeach() \ No newline at end of file diff --git a/heiFIP/assets/PcapHeaders.h b/heiFIP/assets/PcapHeaders.h new file mode 100644 index 0000000..9ca148c --- /dev/null +++ b/heiFIP/assets/PcapHeaders.h @@ -0,0 +1,54 @@ +#pragma once + +#include + +/** + * @struct PcapGlobalHeader + * @brief Represents the 24-byte global header at the start of a PCAP file. + * + * Fields (in file byte order, typically little-endian): + * magic_number : 4 bytes + * • Identifies the file as a PCAP. Common value 0xa1b2c3d4 (nanosecond‐resolution variants differ). + * version_major : 2 bytes + * • Major version of the libpcap file format (e.g., 2). + * version_minor : 2 bytes + * • Minor version of the libpcap file format (e.g., 4). + * thiszone : 4 bytes (signed) + * • Offset from UTC in seconds (usually 0). Historically used for timestamp adjustment. + * sigfigs : 4 bytes + * • Timestamp accuracy; typically set to 0 (no accuracy information). + * snaplen : 4 bytes + * • “Snapshot length” or maximum number of bytes captured per packet. Packets larger than this are truncated. + * network : 4 bytes + * • Data link type (DLT) identifier, e.g., 1 for Ethernet. Determines how to interpret raw packet headers. + */ +struct PcapGlobalHeader { + uint32_t magic_number; // File format identifier: 0xa1b2c3d4 (or swapped/endian variants) + uint16_t version_major; // Major version number (e.g., 2) + uint16_t version_minor; // Minor version number (e.g., 4) + int32_t thiszone; // GMT to local time correction (in seconds; usually 0) + uint32_t sigfigs; // Accuracy of timestamps (in microseconds; typically 0) + uint32_t snaplen; // Max length of captured packets, in bytes + uint32_t network; // Data link type (e.g., 1 = Ethernet) +}; + +/** + * @struct PcapPacketHeader + * @brief Represents the 16-byte per-packet header for each packet in a PCAP file. + * + * Fields (in file byte order, typically little-endian): + * ts_sec : 4 bytes + * • Timestamp, seconds portion, when the packet was captured. + * ts_usec : 4 bytes + * • Timestamp, microseconds portion (0–999999) for finer granularity. + * caplen : 4 bytes + * • Number of bytes of packet data actually saved in the file (may be ≤ original length). + * len : 4 bytes + * • Original length of the packet on the wire (before any truncation). + */ +struct PcapPacketHeader { + uint32_t ts_sec; // Timestamp: seconds since Epoch (Unix time) + uint32_t ts_usec; // Timestamp: microseconds past ts_sec + uint32_t caplen; // Captured length (number of bytes written to file) + uint32_t len; // Original packet length (on-the-wire size) +}; \ No newline at end of file diff --git a/heiFIP/assets/heiFIPPacketImage.cpp b/heiFIP/assets/heiFIPPacketImage.cpp new file mode 100644 index 0000000..d0059fa --- /dev/null +++ b/heiFIP/assets/heiFIPPacketImage.cpp @@ -0,0 +1,306 @@ +#pragma once + +#include +#include +#include "PcapHeaders.h" // Provides PcapPacketHeader for captured length +#include +#include +#include +#include +#include + +/** + * @class heiFIPPacketImage + * @brief Base class for converting raw packet byte data into a 2D matrix representation. + * + * Responsibilities: + * - Store raw packet bytes (std::vector) and captured length (_cap_length). + * - Offer multiple constructors for different initialization styles: + * • Direct data + cap_length. + * • Data only (read cap_length from PcapPacketHeader). + * • Data + image-dimension parameters (dim, fill, auto_dim), which immediately + * build a square “tiled” matrix and store both the matrix and a “binaries” copy. + * - Provide utilities: + * • printHexData(): Print packet bytes in hex for debugging. + * • getHexData(): Retrieve raw bytes as a vector. + * • bit_array(): Convert each byte to its 8-bit binary representation, then pack into 4-bit nibbles. + * • get_matrix_tiled(): Build a dim×dim grayscale matrix from raw bytes, with padding/truncation. + * - Provide getters/setters for data, cap_length, and the computed matrix. + */ +class heiFIPPacketImage { +public: + /** + * @brief Constructor: initialize with raw byte data and explicit captured length. + * + * @param data Vector of raw packet bytes (0–255). + * @param cap_length The “caplen” field from the pcap header indicating how many bytes were captured. + * + * Why: + * - Some callers know the cap_length in advance; this constructor lets them set both fields directly. + */ + heiFIPPacketImage(std::vector data, uint32_t cap_length) + : _data(std::move(data)), _cap_length(cap_length) + {} + + /** + * @brief Constructor: initialize with raw byte data only, reading cap_length from a PcapPacketHeader. + * + * @param data Vector of raw packet bytes. + * + * Workflow: + * 1. Store input bytes in _data. + * 2. Instantiate a PcapPacketHeader (uninitialized), then read its caplen member. + * This assumes that PcapPacketHeader() will auto-populate caplen appropriately (e.g., via global state). + * 3. Store caplen in _cap_length. + * + * Why: + * - In contexts where cap_length comes from a shared or externally managed PcapPacketHeader, + * callers need only supply the byte array; the header’s caplen is fetched internally. + */ + heiFIPPacketImage(std::vector data) + : _data(std::move(data)) + { + PcapPacketHeader packetHeader; + _cap_length = packetHeader.caplen; + } + + /** + * @brief Constructor: initialize with raw byte data and immediately build a tiled image matrix. + * + * @param data Vector of raw packet bytes. + * @param dim Target dimension of the square output image (width = height = dim). + * If auto_dim is true, the actual dimension is computed as ceil(sqrt(length)). + * @param fill Fill value (0–255) used to pad if the flattened data is smaller than dim×dim. + * @param auto_dim If true, ignore provided dim and compute dim = ceil(sqrt(length of data)). + * + * Workflow: + * 1. Store input bytes in _data. + * 2. Instantiate a PcapPacketHeader to fetch cap_length (same as data-only constructor). + * 3. Call get_matrix_tiled(fill, dim, auto_dim), which returns: + * • result.first = 2D matrix (dim×dim) of uint8_t values (padded/truncated). + * • result.second = “binaries” vector-of-vectors, here just a single row of raw data. + * 4. Store result.first in matrix member and result.second in binaries member. + * + * Why: + * - Some callers want to immediately get a matrix representation upon construction, + * so this constructor does that in one step, storing both the matrix and raw-binary copy. + */ + heiFIPPacketImage(std::vector data, int dim, int fill, bool auto_dim) + : _data(std::move(data)) + { + PcapPacketHeader packetHeader; + _cap_length = packetHeader.caplen; + + // Build the tiled matrix and binaries representation in one call. + auto result = heiFIPPacketImage::get_matrix_tiled(fill, dim, auto_dim); + heiFIPPacketImage::matrix = std::move(result.first); + heiFIPPacketImage::binaries = std::move(result.second); + } + + ~heiFIPPacketImage() = default; + + /** + * @brief Print the raw packet bytes in hexadecimal to stdout for debugging. + * + * Output format: + * “Packet has size (Size: bytes):” + * Then each byte printed in “HH ” (two-digit hex, space-separated). + */ + void printHexData() const { + std::cout << std::dec + << "Packet has size" + << " (Size: " << get_cap_length() << " bytes):\n"; + for (size_t i = 0; i < _data.size(); ++i) { + std::cout << std::hex + << std::setw(2) << std::setfill('0') + << static_cast(_data[i]) << " "; + } + std::cout << std::endl; + } + + /** + * @brief Return a copy of the raw packet bytes as a vector. + * + * @return std::vector Each element is one byte from _data. + * + * Why: + * - Some image classes need a direct copy of the packet bytes. + * - Ensures callers cannot modify the original _data member. + */ + std::vector getHexData() const { + std::vector hexData; + hexData.reserve(_data.size()); + for (size_t i = 0; i < _data.size(); ++i) { + hexData.push_back(_data[i]); + } + return hexData; + } + + /** + * @brief Convert raw bytes to a 4-bit–granularity “bit array.” + * + * Workflow: + * 1. Copy each byte from _data into a local vector called data. + * 2. For each byte, produce an 8-character string of ‘0’/‘1’ bits (std::bitset<8>). + * 3. Concatenate all these bit strings into one long string “bytes_as_bits.” + * 4. Walk through bytes_as_bits in 4-bit chunks; each chunk is interpreted as a binary number + * in range 0–15, then appended to transition vector. + * 5. Return transition, a vector of size ceil((8 * _data.size()) / 4). + * + * Why: + * - Some image formats (e.g., certain Markov or n-gram matrices) operate on 4-bit “nibble” values. + * - Converting each byte into two 4-bit values allows constructing those images. + */ + std::vector bit_array() const { + // 1) Copy bytes so as not to modify _data + std::vector data; + data.reserve(_data.size()); + for (uint8_t byte : _data) { + data.push_back(byte); + } + + // 2) Build a concatenated string of bits, 8 bits per byte + std::string bytes_as_bits; + bytes_as_bits.reserve(data.size() * 8); + for (unsigned char byte : data) { + bytes_as_bits += std::bitset<8>(byte).to_string(); + } + + // 3) Group into 4-bit chunks and convert to byte values 0–15 + std::vector transition; + transition.reserve((bytes_as_bits.size() + 3) / 4); + for (size_t i = 0; i < bytes_as_bits.length(); i += 4) { + // If remaining bits < 4 at the end, substring still works (std::stoi will parse up to end) + transition.push_back( + static_cast( + std::stoi(bytes_as_bits.substr(i, 4), nullptr, 2) + ) + ); + } + return transition; + } + + /** + * @brief Build a square “tiled” matrix (dim × dim) from raw bytes, with padding or truncation. + * + * @param fill Value (0–255) to pad matrix cells if flattened data is shorter than dim². + * @param dim Desired dimension of the square output matrix (width = height = dim). + * @param auto_dim If true, compute dim = ceil( sqrt(max(binaries[i].size())) ) before flattening. + * + * Workflow: + * 1. Create a single-element vector-of-vectors called binaries, containing one row: hexData = getHexData(). + * 2. Determine length = max row length in binaries (here, just hexData.size()). + * 3. If auto_dim is true, recompute dim = ceil(sqrt(length)). + * 4. Compute total = dim × dim. + * 5. Flatten binaries into one 1D vector “flat” (binaries only has one row here, but code is generic). + * 6. If flat.size() < total, append (total − flat.size()) copies of fill. + * 7. Else if flat.size() > total, truncate flat to size = total. + * 8. Allocate result as vector>(dim, vector(dim)). + * 9. Fill result[i][j] sequentially from flat[k], where i = k / dim, j = k % dim. + * 10. Return a pair: { result, binaries }. + * + * Returns: + * - first: dim×dim matrix of uint8_t + * - second: original “binaries” row(s) used (here, just hexData). + * + * Why: + * - Many image types represent packet bytes as a square grayscale image, padding/truncating as needed. + * - The “binaries” return value allows higher layers to also inspect the raw vector(s) of bytes. + */ + std::pair>, std::vector>> + get_matrix_tiled(int fill, int dim, bool auto_dim) { + // 1) Build “binaries” as a vector of one row (hexData) + std::vector> binaries; + std::vector hexData = getHexData(); + binaries.push_back(hexData); + + // 2) Determine maximum row length in binaries (only one row here) + size_t length = 0; + for (const auto& b : binaries) { + if (b.size() > length) { + length = b.size(); + } + } + + // 3) If auto_dim is requested, compute dim = ceil(sqrt(length)) + if (auto_dim) { + dim = static_cast(std::ceil(std::sqrt(static_cast(length)))); + } + + int total = dim * dim; + + // 4) Flatten binaries into a single 1D array “flat” + std::vector flat; + flat.reserve(total); + for (const auto& row : binaries) { + flat.insert(flat.end(), row.begin(), row.end()); + } + + // 5) Pad with “fill” if too short + if (flat.size() < static_cast(total)) { + flat.insert(flat.end(), total - flat.size(), static_cast(fill)); + } + // 6) Truncate if too long + else if (flat.size() > static_cast(total)) { + flat.resize(total); + } + + // 7) Reshape into a 2D dim×dim matrix + std::vector> result(dim, std::vector(dim)); + for (size_t idx = 0; idx < static_cast(total); ++idx) { + size_t i = idx / dim; + size_t j = idx % dim; + result[i][j] = flat[idx]; + } + + return { result, binaries }; + } + + /** + * @brief Get a copy of the raw packet data vector. + * @return std::vector Copy of _data. + */ + std::vector get_data() const { + return _data; + } + + /** + * @brief Replace the raw packet data. + * @param data New vector of raw bytes. + */ + void set_data(std::vector data) { + _data = std::move(data); + } + + /** + * @brief Get the captured length (caplen) of this packet. + * @return uint32_t The stored captured length. + */ + uint32_t get_cap_length() const { + return _cap_length; + } + + /** + * @brief Set the captured length (caplen). + * @param cap_length New captured length value. + */ + void set_cap_length(uint32_t cap_length) { + _cap_length = cap_length; + } + + /** + * @brief Return a reference to the stored 2D matrix. + * @return std::vector>& The dim×dim matrix built by a tiled constructor. + * + * Note: If get_matrix_tiled() was never called, matrix may be empty. + */ + const std::vector>& get_matrix() const { + return matrix; + } + +private: + std::vector _data; ///< Raw bytes of the packet + uint32_t _cap_length; ///< Captured length from pcap header + std::vector> binaries; ///< Original binaries as rows (usually one row of raw bytes) + std::vector> matrix; ///< Tiled dim×dim matrix representation of packet bytes +}; \ No newline at end of file diff --git a/heiFIP/cli.cpp b/heiFIP/cli.cpp new file mode 100644 index 0000000..a975eff --- /dev/null +++ b/heiFIP/cli.cpp @@ -0,0 +1,157 @@ +#include +#include +#include +#include + +#include "extractor.cpp" +#include "runner.cpp" + +/// @brief Prints usage/help information for the CLI tool. +void print_usage(const char* progName) { + std::cout << "Usage: " << progName << " [options]\n" + << " -i, --input FILE input pcap file path\n" + << " -o, --output DIR output directory\n" + << " -t, --threads N number of threads (default 1)\n" + << " -p, --processor TYPE preprocessing type: NONE or HEADER\n" + << " -m, --mode MODE image type: FlowImage, FlowImageTiledFixed, FlowImageTiledAuto,\n" + << " MarkovTransitionMatrixFlow, MarkovTransitionMatrixPacket, PacketImage\n" + << " --dim N image dimension\n" + << " --fill N fill value for missing data\n" + << " --cols N number of columns (used in some modes)\n" + << " --auto-dim enable auto-dimension (FlowImageTiledAuto, etc.)\n" + << " --append append mode for FlowImage\n" + << " --min-dim N minimum image dimension\n" + << " --max-dim N maximum image dimension\n" + << " --min-pkts N minimum packets per flow\n" + << " --max-pkts N maximum packets per flow\n" + << " --remove-dup remove duplicate packets/flows\n" + << " --name name of processed image\n " + << " -h, --help display this help and exit\n"; +} + +int main(int argc, char* argv[]) { + // CLI parameter variables + std::string input_file; + std::string output_dir; + int thread_count = 1; + PacketProcessorType proc_type = PacketProcessorType::NONE; + ImageType img_type = ImageType::PacketImage; + + // Optional parameters with defaults + std::string image_name = "heiFIPGeneratedImage"; + size_t dim = 0, fill = 0, cols = 0; + bool auto_dim = false, append = false; + size_t min_dim = 0, max_dim = 0; + size_t min_pkts = 0, max_pkts = 0; + bool remove_dup = false; + + // Long options for getopt + static struct option long_opts[] = { + {"name", required_argument, 0, 0 }, + {"input", required_argument, 0, 'i'}, + {"output", required_argument, 0, 'o'}, + {"threads", required_argument, 0, 't'}, + {"processor", required_argument, 0, 'p'}, + {"mode", required_argument, 0, 'm'}, + {"dim", required_argument, 0, 0 }, + {"fill", required_argument, 0, 0 }, + {"cols", required_argument, 0, 0 }, + {"auto-dim", no_argument, 0, 0 }, + {"append", no_argument, 0, 0 }, + {"min-dim", required_argument, 0, 0 }, + {"max-dim", required_argument, 0, 0 }, + {"min-pkts", required_argument, 0, 0 }, + {"max-pkts", required_argument, 0, 0 }, + {"remove-dup", no_argument, 0, 0 }, + {"help", no_argument, 0, 'h'}, + {0, 0, 0, 0} + }; + + // Parse command-line arguments + int opt; + int long_index = 0; + while ((opt = getopt_long(argc, argv, "i:o:t:p:m:h", long_opts, &long_index)) != -1) { + switch (opt) { + case 'i': input_file = optarg; break; + case 'o': output_dir = optarg; break; + case 't': thread_count = std::stoi(optarg); break; + case 'p': + if (std::string(optarg) == "NONE") proc_type = PacketProcessorType::NONE; + else if (std::string(optarg) == "HEADER") proc_type = PacketProcessorType::HEADER; + else { std::cerr << "Unknown processor type\n"; return 1; } + break; + case 'm': + if (std::string(optarg) == "PacketImage") img_type = ImageType::PacketImage; + else if (std::string(optarg) == "FlowImage") img_type = ImageType::FlowImage; + else if (std::string(optarg) == "FlowImageTiledFixed") img_type = ImageType::FlowImageTiledFixed; + else if (std::string(optarg) == "FlowImageTiledAuto") img_type = ImageType::FlowImageTiledAuto; + else if (std::string(optarg) == "MarkovFlow") img_type = ImageType::MarkovTransitionMatrixFlow; + else if (std::string(optarg) == "MarkovPacket") img_type = ImageType::MarkovTransitionMatrixPacket; + else { std::cerr << "Unknown mode\n"; return 1; } + break; + case 0: + if (strcmp(long_opts[long_index].name, "dim") == 0) dim = std::stoi(optarg); + else if (strcmp(long_opts[long_index].name, "fill") == 0) fill = std::stoi(optarg); + else if (strcmp(long_opts[long_index].name, "cols") == 0) cols = std::stoi(optarg); + else if (strcmp(long_opts[long_index].name, "auto-dim") == 0) auto_dim = true; + else if (strcmp(long_opts[long_index].name, "append") == 0) append = true; + else if (strcmp(long_opts[long_index].name, "min-dim") == 0) min_dim = std::stoi(optarg); + else if (strcmp(long_opts[long_index].name, "max-dim") == 0) max_dim = std::stoi(optarg); + else if (strcmp(long_opts[long_index].name, "min-pkts") == 0) min_pkts = std::stoi(optarg); + else if (strcmp(long_opts[long_index].name, "max-pkts") == 0) max_pkts = std::stoi(optarg); + else if (strcmp(long_opts[long_index].name, "remove-dup") == 0) remove_dup = true; + else if (strcmp(long_opts[long_index].name, "name") == 0) image_name = optarg; + break; + case 'h': print_usage(argv[0]); return 0; + default: print_usage(argv[0]); return 1; + } + } + + // Input and output are required + if (input_file.empty() || output_dir.empty()) { + print_usage(argv[0]); + return 1; + } + + Runner runner(thread_count); // Create runner with specified thread count + + // Select argument type based on image type + ImageArgsVariant args; + switch (img_type) { + case ImageType::FlowImage: + args = FlowImageArgs{dim, append, fill}; + break; + case ImageType::FlowImageTiledFixed: + args = FlowImageTiledFixedArgs{dim, fill, cols}; + break; + case ImageType::FlowImageTiledAuto: + args = FlowImageTiledAutoArgs{dim, fill, auto_dim}; + break; + case ImageType::MarkovTransitionMatrixFlow: + args = MarkovTransitionMatrixFlowArgs{cols}; + break; + case ImageType::MarkovTransitionMatrixPacket: + args = MarkovTransitionMatrixPacketArgs{}; + break; + case ImageType::PacketImage: + args = PacketImageArgs{dim, auto_dim, fill}; + break; + } + + // Main image generation call using the configured arguments + runner.create_image( + image_name, // Output image name + input_file, // Input `.pcap` file + output_dir, // Output directory + args, // Variant holding image arguments + proc_type, // Packet preprocessing strategy + img_type, // Image generation mode + min_dim, // Minimum image dimension + max_dim, // Maximum image dimension + min_pkts, // Minimum packets per flow + max_pkts, // Maximum packets per flow + remove_dup // Whether to remove duplicate flows + ); + + return 0; +} \ No newline at end of file diff --git a/heiFIP/cmake/FindOpenSSL.cmake b/heiFIP/cmake/FindOpenSSL.cmake new file mode 100644 index 0000000..6455dc7 --- /dev/null +++ b/heiFIP/cmake/FindOpenSSL.cmake @@ -0,0 +1,48 @@ +# --- cmake/Modules/FindOpenSSL.cmake --- + +# 1) Locate the directory that contains openssl/sha.h +find_path(OPENSSL_INCLUDE_DIR + NAMES openssl/sha.h + PATHS + $ENV{OPENSSL_ROOT_DIR}/include # if user set OPENSSL_ROOT_DIR + /opt/homebrew/Cellar/openssl@3/*/include # glob into all versions + /opt/homebrew/include # Homebrew “flat” symlink + /usr/local/include + /usr/include +) + +# 2) Locate the libraries (unchanged) +find_library(OPENSSL_CRYPTO_LIBRARY + NAMES crypto + PATHS + $ENV{OPENSSL_ROOT_DIR}/lib + /opt/homebrew/Cellar/openssl@3/*/lib + /opt/homebrew/lib + /usr/local/lib + /usr/lib +) +find_library(OPENSSL_SSL_LIBRARY + NAMES ssl + PATHS + $ENV{OPENSSL_ROOT_DIR}/lib + /opt/homebrew/Cellar/openssl@3/*/lib + /opt/homebrew/lib + /usr/local/lib + /usr/lib +) + +# 3) Standard boilerplate +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(OpenSSL + REQUIRED_VARS OPENSSL_INCLUDE_DIR + OPENSSL_CRYPTO_LIBRARY + OPENSSL_SSL_LIBRARY +) + +if(OpenSSL_FOUND) + set(OpenSSL_INCLUDE_DIRS "${OPENSSL_INCLUDE_DIR}") + set(OpenSSL_LIBRARIES + "${OPENSSL_SSL_LIBRARY}" + "${OPENSSL_CRYPTO_LIBRARY}" + ) +endif() \ No newline at end of file diff --git a/heiFIP/cmake/FindPcapPlusPlus.cmake b/heiFIP/cmake/FindPcapPlusPlus.cmake new file mode 100644 index 0000000..4552a4e --- /dev/null +++ b/heiFIP/cmake/FindPcapPlusPlus.cmake @@ -0,0 +1,70 @@ +# cmake/Modules/FindPcapPlusPlus.cmake + +# 1) Locate the headers +# --- find the raw include dir (may end up being /opt/homebrew/include) --- +find_path(PPCPP_INCLUDE_DIR + NAMES SystemUtils.h + PATHS + $ENV{PCAPPLUSPLUS_ROOT}/include/pcapplusplus # direct Cellar path + /opt/homebrew/include/pcapplusplus # Homebrew symlink + HINTS + $ENV{PCAPPLUSPLUS_ROOT} + /usr/local + /usr +) + +# --- if CMake only found the parent 'include' directory, but +# the real headers are under include/pcapplusplus, fix it up --- +if(PPCPP_INCLUDE_DIR) + # e.g. PPCPP_INCLUDE_DIR = /opt/homebrew/include + if(NOT EXISTS "${PPCPP_INCLUDE_DIR}/SystemUtils.h" + AND EXISTS "${PPCPP_INCLUDE_DIR}/pcapplusplus/SystemUtils.h") + message(STATUS " >> Adjusting Ppcpp include dir to subfolder ‘pcapplusplus/’") + set(PPCPP_INCLUDE_DIR "${PPCPP_INCLUDE_DIR}/pcapplusplus") + endif() +endif() + +# 2) Locate the libraries +find_library(PPCPP_COMMONPP_LIB + NAMES Common++ + HINTS + ENV PCAPPLUSPLUS_ROOT + /usr/local/lib + /opt/homebrew/lib + /usr/lib +) +find_library(PPCPP_PACKETPP_LIB + NAMES Packet++ + HINTS + ENV PCAPPLUSPLUS_ROOT + /usr/local/lib + /opt/homebrew/lib + /usr/lib +) +find_library(PPCPP_PCAPPP_LIB + NAMES Pcap++ + HINTS + ENV PCAPPLUSPLUS_ROOT + /usr/local/lib + /opt/homebrew/lib + /usr/lib +) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(PcapPlusPlus + REQUIRED_VARS PPCPP_INCLUDE_DIR + PPCPP_COMMONPP_LIB + PPCPP_PACKETPP_LIB + PPCPP_PCAPPP_LIB +) + +if(PcapPlusPlus_FOUND) + set(PcapPlusPlus_INCLUDE_DIRS "${PPCPP_INCLUDE_DIR}") + set(PcapPlusPlus_LIBRARIES + ${PPCPP_COMMONPP_LIB} + ${PPCPP_PACKETPP_LIB} + ${PPCPP_PCAPPP_LIB} + pcap + pthread + ) +endif() \ No newline at end of file diff --git a/heiFIP/extractor.cpp b/heiFIP/extractor.cpp new file mode 100644 index 0000000..c493144 --- /dev/null +++ b/heiFIP/extractor.cpp @@ -0,0 +1,539 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "init.cpp" +#include "flow.cpp" +#include "flow_tiled_auto.cpp" +#include "flow_tiled_fixed.cpp" +#include "markov_chain.cpp" +#include "heiFIPPacketImage.cpp" + + +/** + * @struct FlowImageArgs + * @brief Parameters for creating a simple flow-based image. + * @param dim The target dimension (width and height) for the square output image. + * @param append If true, append new flow data to existing rows/columns rather than overwriting. + * @param fill Fill value to use when a flow has fewer packets than `dim` (padding). + */ +struct FlowImageArgs { + size_t dim; + bool append; + size_t fill; +}; + +/** + * @struct FlowImageTiledFixedArgs + * @brief Parameters for creating a tiled flow image with a fixed number of columns. + * @param dim The dimension (width and height) of each tile (sub-image). + * @param fill Fill value used to pad tiles that have fewer packets than `dim * dim`. + * @param cols The number of columns of tiles to arrange horizontally. + */ +struct FlowImageTiledFixedArgs { + size_t dim; + size_t fill; + size_t cols; +}; + +/** + * @struct FlowImageTiledAutoArgs + * @brief Parameters for creating a tiled flow image where the number of tiles per row is determined automatically. + * @param dim The approximate dimension (width/height) of each tile. + * @param fill Fill value for padding each tile. + * @param auto_dim If true, adapt the actual tile size at runtime based on packet count and other heuristics. + */ +struct FlowImageTiledAutoArgs { + size_t dim; + size_t fill; + bool auto_dim; +}; + +/** + * @struct PacketImageArgs + * @brief Parameters for creating an image out of raw packet bytes (one image per packet). + * @param dim The dimension (width/height) of the output packet image. + * @param auto_dim If true, allow the image to grow/shrink based on packet length (otherwise force `dim x dim`). + * @param fill Fill value to pad packet data if it is shorter than `dim * dim`. + */ +struct PacketImageArgs { + size_t dim; + bool auto_dim; + size_t fill; +}; + +/** + * @struct MarkovTransitionMatrixFlowArgs + * @brief Parameters for creating a flow-level Markov transition matrix image. + * @param cols The number of columns (and rows) in the square transition matrix (state space size). + */ +struct MarkovTransitionMatrixFlowArgs { + size_t cols; +}; + +/** + * @struct MarkovTransitionMatrixPacketArgs + * @brief No parameters needed for packet‐level Markov transition matrix (state space inferred from packet features). + */ +struct MarkovTransitionMatrixPacketArgs {}; + +/** + * @typedef ImageArgsVariant + * @brief A std::variant that can hold any of the argument structures above, or std::monostate if not initialized. + * + * Usage: use std::get(args) once you know which ImageType you are generating. + */ +using ImageArgsVariant = std::variant< + std::monostate, + FlowImageArgs, + FlowImageTiledFixedArgs, + FlowImageTiledAutoArgs, + PacketImageArgs, + MarkovTransitionMatrixFlowArgs, + MarkovTransitionMatrixPacketArgs +>; + +/** + * @typedef UInt8Matrix + * @brief A 3D vector representing one or more grayscale images. + * Dimensions: [num_images][height][width], where each pixel is a uint8_t (0–255). + */ +using UInt8Matrix = std::vector>>; + +// This concept checks on thing on ImgType: +// 1) `image.get_matrix()` must be valid and return something convertible to +// const std::vector>& +template +concept IsFlowImage = requires(const ImgType& image) { + // Require `get_matrix() -> std::vector>&` + { image.get_matrix() } -> std::convertible_to>&>; +}; + +/** + * @enum ImageType + * @brief Enumeration of supported image‐generation modes. + * + * - FlowImage: One image per entire flow, packets arranged sequentially. + * - FlowImageTiledFixed: Splits each flow into fixed-size tiles and arranges them in a grid. + * - FlowImageTiledAuto: Similar to tiled fixed, but determines tile layout dynamically. + * - PacketImage: One image per packet, each packet’s raw bytes laid out row‐major. + * - MarkovTransitionMatrixFlow: Build a transition matrix between flow states (e.g., protocol flags). + * - MarkovTransitionMatrixPacket: Build a transition matrix between packet‐level states (e.g., byte patterns). + */ +enum class ImageType { + FlowImage, + FlowImageTiledFixed, + FlowImageTiledAuto, + PacketImage, + MarkovTransitionMatrixFlow, + MarkovTransitionMatrixPacket +}; + +/** + * @class FIPExtractor + * @brief Coordinates reading pcap data, preprocessing, creating various image formats, and saving results. + * + * Responsibilities: + * 1. Read packets from a file or in-memory list via PacketProcessor. + * 2. Convert packet/flow data into one of several image types (FlowImage, PacketImage, etc.). + * 3. Validate image dimensions and optionally suppress duplicates. + * 4. Save the generated grayscale image(s) to disk as PNG. + */ +class FIPExtractor { +public: + /** + * @brief Verify that an image matrix meets size constraints and (optionally) isn’t a duplicate. + * + * @tparam ImgType A type providing: + * size() → number of rows (height), + * operator.size() → number of columns (width), + * data() → raw pointer or contiguous data buffer, + * dataSize() → total number of bytes. + * @param image The 2D (or 3D) matrix returned by ImgType::get_matrix(). + * @param minImageDim Minimum allowed dimension (height or width). Reject if smaller. + * @param maxImageDim Maximum allowed dimension (height or width). Reject if larger; zero → no limit. + * @param removeDuplicates If true, compare this image’s raw bytes to a set of previously created images, + * and reject if it already exists. (Currently commented out; future feature.) + * @return true if image passes all checks, false otherwise. + */ + + template + bool verify(const ImgType& image, + size_t minImageDim, + size_t maxImageDim, + bool removeDuplicates) + { + size_t height = image.get_matrix().size(); + size_t width = image.get_matrix()[0].size(); + + // Enforce minimum dimension constraint: + if (height < minImageDim || width < minImageDim) { + std::cout << "[!] Image not created: dimensions smaller than minimum (" + << minImageDim << ").\n"; + return false; + } + + // Enforce maximum dimension constraint (if nonzero): + if (maxImageDim != 0 && (height > maxImageDim || width > maxImageDim)) { + std::cout << "[!] Image not created: dimensions exceed maximum (" + << maxImageDim << ").\n"; + return false; + } + + if (removeDuplicates) { + std::vector> matrix = image.get_matrix(); + if (imagesCreatedSet.count(matrix)) { + std::cout << "[!] Image not created: duplicate detected.\n"; + return false; + } + imagesCreatedSet.insert({matrix, true}); + } + + return true; + } + + /** + * @brief Default constructor initializes internal PacketProcessor. + */ + FIPExtractor() + : processor() + {} + + /** + * @brief Read packets from a pcap file, preprocess, convert to image(s), and return as matrices. + * + * @param input_file Path to the .pcap file. Must exist on disk. + * @param args Variant containing the specific parameters for the chosen ImageType. + * @param preprocessing_type NONE or HEADER: whether to strip non-header bytes, etc. + * @param image_type Which type of image(s) to create (see ImageType enum). + * @param min_image_dim Minimum image dimension; images smaller will be discarded. + * @param max_image_dim Maximum image dimension; images larger will be discarded. + * @param min_packets_per_flow Minimum packet count for a flow to produce an image (only relevant to flow modes). + * @param max_packets_per_flow Maximum packet count per flow; extra packets are dropped. + * @param remove_duplicates If true, drop identical packets/flows during preprocessing. + * @return UInt8Matrix A vector of 2D matrices ([num_images][height][width]) ready for saving. + * @throws std::runtime_error if input_file doesn’t exist or args aren’t initialized. + */ + UInt8Matrix createImageFromFile( + const std::string& input_file, + const ImageArgsVariant& args, + PacketProcessorType preprocessing_type = PacketProcessorType::NONE, + ImageType image_type = ImageType::PacketImage, + int min_image_dim = 0, + int max_image_dim = 0, + int min_packets_per_flow = 0, + int max_packets_per_flow = 0, + bool remove_duplicates = false + ) { + // Verify existence of the pcap file before proceeding: + if (!std::filesystem::exists(input_file)) { + throw std::runtime_error("Input file does not exist: " + input_file); + } + + // Read and preprocess packets from the file: + // - If remove_duplicates is true, duplicates are dropped here. + // - If max_packets_per_flow > 0, stop reading after that many packets. + std::vector> processed_packets = + processor.readPacketsFile( + input_file, + preprocessing_type, + remove_duplicates, + max_packets_per_flow + ); + + // Delegate to createMatrix, passing along preprocessing/filtering criteria + return createMatrix( + processed_packets, + preprocessing_type, + image_type, + min_image_dim, + max_image_dim, + min_packets_per_flow, + max_packets_per_flow, + remove_duplicates, + args + ); + } + + /** + * @brief Convert an in-memory list of RawPacket pointers to image(s). + * + * @param packets A vector of unique_ptr containing raw packet data. + * @param args Variant of parameters for the desired ImageType. + * @param preprocessing_type NONE/HEADER: how to preprocess each RawPacket. + * @param image_type Which image mode to use. + * @param min_image_dim Minimum image dimension threshold. + * @param max_image_dim Maximum image dimension threshold. + * @param min_packets_per_flow Minimum packet count to form a flow (flow-based modes only). + * @param max_packets_per_flow Maximum packet count per flow; extra packets are dropped. + * @param remove_duplicates If true, drop duplicate packets in preprocessing. + * @return UInt8Matrix A list of 2D matrices representing generated image(s). + */ + UInt8Matrix createImageFromPacket( + std::vector>& packets, + const ImageArgsVariant& args, + PacketProcessorType preprocessing_type = PacketProcessorType::NONE, + ImageType image_type = ImageType::PacketImage, + size_t min_image_dim = 0, + size_t max_image_dim = 0, + size_t min_packets_per_flow = 0, + size_t max_packets_per_flow = 0, + bool remove_duplicates = false + ) { + // First, convert RawPacket vector into FIPPacket (which wraps RawPacket and extracts features): + std::vector> processed_packets = + processor.readPacketsList(packets, preprocessing_type, remove_duplicates); + + // Delegate to createMatrix to produce the actual image(s): + return createMatrix( + processed_packets, + preprocessing_type, + image_type, + min_image_dim, + max_image_dim, + min_packets_per_flow, + max_packets_per_flow, + remove_duplicates, + args + ); + } + + /** + * @brief Core dispatcher that builds one or more images from FIPPacket data, based on ImageType. + * + * @param packets Preprocessed packets wrapped in unique_ptr. + * @param preprocessing_type Repeats the chosen preprocessing strategy (just for bookkeeping). + * @param image_type Determines which case in the switch to execute. + * @param min_image_dim Reject images smaller than this dimension. + * @param max_image_dim Reject images larger than this dimension; zero → no limit. + * @param min_packets_per_flow For flow-based modes: skip flows with fewer than this many packets. + * @param max_packets_per_flow For flow-based modes: truncate flows to this many packets. + * @param remove_duplicates If true, drop duplicates in `verify()`. + * @param args A variant containing exactly one of the argument structs required by the chosen ImageType. + * @return UInt8Matrix A list of image matrices; possibly empty if no image passed `verify()`. + * @throws std::runtime_error If `args` is std::monostate or ImageType is invalid. + */ + UInt8Matrix createMatrix( + std::vector>& packets, + PacketProcessorType preprocessing_type, + ImageType image_type, + size_t min_image_dim, + size_t max_image_dim, + size_t min_packets_per_flow, + size_t max_packets_per_flow, + bool remove_duplicates, + const ImageArgsVariant& args + ) { + // Ensure the caller provided a valid argument struct for the chosen image type: + if (std::holds_alternative(args)) { + throw std::runtime_error("Image arguments not initialized for ImageType."); + } + + // If we have a maximum packet‐per‐flow limit, cut the packet list down now: + if (max_packets_per_flow && packets.size() > max_packets_per_flow) { + packets.resize(max_packets_per_flow); + } + + // Convert each FIPPacket into a heiFIPPacketImage (byte vector). + // We do this early so that flow‐based or packet‐based modes can all operate on the same type. + std::vector packets_copy; + packets_copy.reserve(packets.size()); + for (const auto& packetPtr : packets) { + // Extract raw bytes from the FIPPacket’s underlying RawPacket + const uint8_t* packetData = packetPtr->getRawPacket()->getRawData(); + size_t packetLen = packetPtr->getRawPacket()->getRawDataLen(); + + // Copy bytes into a std::vector + std::vector rawData; + rawData.reserve(packetLen); + for (size_t i = 0; i < packetLen; ++i) { + rawData.push_back(packetData[i]); + } + + // Construct a packet‐image wrapper from rawData + packets_copy.emplace_back(rawData); + } + + // Now switch on the image type; each case returns either 1 image (as a single‐element vector) + // or multiple images (e.g., a separate PacketImage for each packet). + switch (image_type) { + case ImageType::FlowImage: { + // If a flow doesn’t have enough packets, skip entirely: + if (packets.size() < min_packets_per_flow) { + return {}; // Return empty vector + } + + // Extract the arguments specific to FlowImage: + auto flowArgs = std::get(args); + + // Construct a FlowImage: uses packets_copy, desired dimension, fill value, and append flag + FlowImage image(packets_copy, flowArgs.dim, flowArgs.fill, flowArgs.append); + + // Validate the resulting 2D matrix, then return it in a 1-element vector if valid: + if (verify(image, min_image_dim, max_image_dim, remove_duplicates)) { + return { image.get_matrix() }; + } + return {}; + } + + case ImageType::FlowImageTiledFixed: { + if (packets.size() < min_packets_per_flow) { + return {}; + } + + auto tiledArgs = std::get(args); + FlowImageTiledFixed image(packets_copy, tiledArgs.dim, tiledArgs.fill, tiledArgs.cols); + + if (verify(image, min_image_dim, max_image_dim, remove_duplicates)) { + return { image.get_matrix() }; + } + return {}; + } + + case ImageType::FlowImageTiledAuto: { + if (packets.size() < min_packets_per_flow) { + return {}; + } + + auto autoArgs = std::get(args); + FlowImageTiledAuto image(packets_copy, autoArgs.dim, autoArgs.fill, autoArgs.auto_dim); + + if (verify(image, min_image_dim, max_image_dim, remove_duplicates)) { + return { image.get_matrix() }; + } + return {}; + } + + case ImageType::PacketImage: { + // Extract parameters for packet‐level images: + auto packetArgs = std::get(args); + UInt8Matrix images; // We may generate one image per packet + + // Loop through each packet’s raw data, building a packet image: + for (const auto& pktPtr : packets) { + const uint8_t* data = pktPtr->getRawPacket()->getRawData(); + size_t len = pktPtr->getRawPacket()->getRawDataLen(); + + std::vector rawData; + rawData.reserve(len); + for (size_t i = 0; i < len; ++i) { + rawData.push_back(data[i]); + } + + // Create a packet‐level image (dim × dim or auto‐sized): + heiFIPPacketImage image(rawData, packetArgs.dim, packetArgs.fill, packetArgs.auto_dim); + auto matrix = image.get_matrix(); + + // Only include if it passes dimension checks: + if (verify(image, min_image_dim, max_image_dim, remove_duplicates)) { + images.push_back(matrix); + } + } + + return images; + } + + case ImageType::MarkovTransitionMatrixFlow: { + if (packets.size() < min_packets_per_flow) { + return {}; + } + + auto markovFlowArgs = std::get(args); + MarkovTransitionMatrixFlow image(packets_copy, markovFlowArgs.cols); + + if (verify(image, min_image_dim, max_image_dim, remove_duplicates)) { + return { image.get_matrix() }; + } + return {}; + } + + case ImageType::MarkovTransitionMatrixPacket: { + // Packet-level Markov: each packet produces one transition matrix image + UInt8Matrix images; + + for (const auto& pktPtr : packets) { + const uint8_t* data = pktPtr->getRawPacket()->getRawData(); + size_t len = pktPtr->getRawPacket()->getRawDataLen(); + + // Build a raw packet image (byte vector) first: + std::vector rawData; + rawData.reserve(len); + for (size_t i = 0; i < len; ++i) { + rawData.push_back(data[i]); + } + heiFIPPacketImage packetImage(rawData); + + // Now build Markov transition matrix from that packetImage: + MarkovTransitionMatrixPacket image(packetImage); + auto matrix = image.get_matrix(); + + if (verify(image, min_image_dim, max_image_dim, remove_duplicates)) { + images.push_back(matrix); + } + } + + return images; + } + + default: + throw std::runtime_error("Unsupported ImageType passed to createMatrix"); + } + + // Should never get here because each switch-case returns or throws + return {}; + } + + /** + * @brief Write the first 2D image in a UInt8Matrix vector to disk as a PNG file. + * + * @param img A vector of 2D matrices. Only `img[0]` is used (grayscale). + * @param output_path The desired file path (without extension). A ".png" is appended. + * + * Steps: + * 1. Check that img is non-empty and contains at least one image. + * 2. Interpret img[0] as a grayscale pixel grid: height × width, each pixel 0–255. + * 3. Allocate an OpenCV Mat of type CV_8UC1 (single channel, 8-bit). + * 4. Copy each pixel from the 2D vector into the Mat’s row‐major buffer. + * 5. Ensure parent directory exists by calling std::filesystem::create_directories(). + * 6. Write the Mat to disk using cv::imwrite(..., path + ".png"). + */ + void save_image(const UInt8Matrix& img, const std::string& output_path) { + // Quick sanity check: must have at least one image, and that image must be non-empty + if (img.empty() || img[0].empty() || img[0][0].empty()) { + std::cerr << "[!] Empty image, cannot save: " << output_path << "\n"; + return; + } + + // Work with the first image slice (assuming grayscale) + const auto& grayscale_image = img[0]; + int height = static_cast(grayscale_image.size()); + int width = static_cast(grayscale_image[0].size()); + + // Create an OpenCV Mat of the correct size and type (8‐bit unsigned, single channel) + cv::Mat mat(height, width, CV_8UC1); + + // Copy pixel values row by row + for (int i = 0; i < height; ++i) { + uint8_t* row_ptr = mat.ptr(i); + for (int j = 0; j < width; ++j) { + row_ptr[j] = grayscale_image[i][j]; + } + } + + // Append .png extension and ensure parent directory exists + std::filesystem::path outp(output_path + ".png"); + std::filesystem::create_directories(outp.parent_path()); + + // Write the PNG file to disk + cv::imwrite(outp.string(), mat); + } + +private: + PacketProcessor processor; ///< Responsible for reading pcap data, handling preprocessing, and converting RawPacket → FIPPacket + std::map>, bool> imagesCreatedSet; +}; \ No newline at end of file diff --git a/heiFIP/images/NetworkTrafficImage.hpp b/heiFIP/images/NetworkTrafficImage.hpp new file mode 100644 index 0000000..a5bb3a5 --- /dev/null +++ b/heiFIP/images/NetworkTrafficImage.hpp @@ -0,0 +1,40 @@ +#pragma once + +/** + * @class NetworkTrafficImage + * @brief Base class for all traffic‐based image generators. + * + * Responsibilities: + * - Store common image parameters: a fill value and a base dimension. + * - Provide a common interface (via inheritance) for more specialized traffic image classes + * (e.g., FlowImage, MarkovTransitionMatrixFlow) to share these parameters. + * + * Members: + * _fill : Byte value (0–255) used to pad empty pixels when constructing images. + * _dim : Base dimension (e.g., tile size) used by derived classes as a starting value. + * + * Why: + * - Derived classes may need a default padding value and dimension for their image‐construction logic. + * - By centralizing these fields here, all traffic‐image types can uniformly receive and store them. + */ +class NetworkTrafficImage { +private: + int _fill; ///< Value to pad empty or unused pixels when building images + int _dim; ///< Base dimension (e.g., tile width/height) for derived‐class image logic + +public: + /** + * @brief Constructor: initialize default fill value and dimension. + * + * @param fill Byte value used for padding (default = 0). + * @param dim Base dimension (default = 8). Derived classes may override or use this. + * + * Workflow: + * 1. Store `fill` in _fill. + * 2. Store `dim` in _dim. + * 3. Derived classes inherit these settings for use in their image‐building routines. + */ + NetworkTrafficImage(int fill = 0, int dim = 8) + : _fill(fill), _dim(dim) + {} +}; \ No newline at end of file diff --git a/heiFIP/images/flow.cpp b/heiFIP/images/flow.cpp new file mode 100644 index 0000000..05b0e18 --- /dev/null +++ b/heiFIP/images/flow.cpp @@ -0,0 +1,160 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "NetworkTrafficImage.hpp" +#include "heiFIPPacketImage.cpp" + +/** + * @class FlowImage + * @brief Converts a sequence of heiFIPPacketImage instances (one per flow) into a 2D matrix. + * + * Inherits from NetworkTrafficImage, which provides shared logic for traffic-based images. + * Responsibilities: + * - Accept a vector of packet images representing one flow. + * - Either “append” all packet byte vectors into a single long vector and reshape, + * or lay out each packet’s bytes on its own row, padding to a uniform length. + * - Provide getters for both the tiled matrix and the raw binaries. + */ +class FlowImage : public NetworkTrafficImage { +public: + /** + * @brief Constructor: build a FlowImage from a list of packet‐level images. + * + * @param packets Vector of heiFIPPacketImage, each representing one packet’s bytes. + * @param dim If append=true, width of each row when concatenating all packets. + * If append=false, this is ignored (rows are padded to the maximum packet length). + * @param fill Value (0–255) to pad shorter rows (when not appending) or at end of concatenation. + * @param append If true, concatenate all packet byte arrays into one long vector and then + * split into rows of width=dim. If false, place each packet’s bytes on its own row. + * + * Workflow: + * 1. Call NetworkTrafficImage(fill, dim) to initialize base-class fields (e.g., storing fill and dim). + * 2. Store the input `packets` and `append` flag. + * 3. Call getMatrix(dim, append, fill, packets) to build: + * - matrix: 2D vector representing the flow image. + * - binaries: vector of each packet’s raw byte vector (for reference). + * 4. Store the returned matrix and binaries in member variables. + */ + FlowImage(std::vector packets, int dim = 16, int fill = 0, bool append = false) + : NetworkTrafficImage(fill, dim), packets(packets), append(append) + { + auto result = getMatrix(dim, append, fill, packets); + matrix = std::move(result.first); + binaries = std::move(result.second); + } + + /** + * @brief Get the raw binaries for each packet in the flow. + * @return Reference to the vector of vectors of uint8_t, one per packet. + */ + std::vector>& get_binaries() { + return binaries; + } + + /** + * @brief Get the 2D matrix representing the flow image. + * @return Reference to a 2D vector of size [numRows][numCols]. + * + * If append=true, numRows = ceil(totalBytes / dim) and numCols = dim. + * If append=false, numRows = number of packets and numCols = max packet length. + */ + const std::vector>& get_matrix() const { + return matrix; + } + +private: + std::vector packets; ///< Input packet images for this flow + bool append; ///< Whether to concatenate all packet bytes before reshaping + std::vector> matrix; ///< Resulting 2D image matrix + std::vector> binaries; ///< Original raw byte vectors (one per packet) + + /** + * @brief Build the matrix and store raw binaries depending on the append flag. + * + * @param dim Desired width when appending all bytes into one long vector. + * @param append If true, concatenate all packet byte arrays first; otherwise treat each packet separately. + * @param fill Byte value used to pad incomplete rows. + * @param packets Vector of heiFIPPacketImage instances to process. + * @return pair: + * - first: 2D matrix of uint8_t values (each row corresponds to either a flow segment or a packet). + * - second: Raw packet‐byte vectors as originally extracted (“binaries”). + * + * Workflow when append=true: + * 1. For each heiFIPPacketImage in `packets`, call getHexData() to get a vector. + * 2. Append each packet’s bytes in sequence into one long vector `fh`. + * 3. Compute number of rows: rn = ceil(fh.size() / dim). Resize fh to rn*dim by appending zeros. + * 4. Allocate a 2D vector `reshaped` of size [rn][dim]. + * 5. Copy fh[i*dim ... (i+1)*dim−1] into reshaped[i] for i in [0..rn−1]. + * 6. Return {reshaped, binaries}. + * + * Workflow when append=false: + * 1. For each heiFIPPacketImage in `packets`, call getHexData() to get vector `binary`. + * 2. Track the maximum length among all `binary.size()`. + * 3. For each `binary`, create a new row `row = binary` then resize to length=maxLength, filling with `fill`. + * 4. Push `row` into `reshaped`. + * 5. Return {reshaped, binaries}. + */ + std::pair>, std::vector>> + getMatrix(int dim, bool append, int fill, const std::vector& packets) { + std::vector> binaries; + + // 1) Extract raw bytes from each heiFIPPacketImage + for (const auto& packet : packets) { + std::vector hexData = packet.getHexData(); + binaries.push_back(std::move(hexData)); + } + + // If concatenating all packet bytes into one long flow image + if (append) { + std::vector fh; + // a) Append each packet’s bytes into fh + for (const auto& binary : binaries) { + fh.insert(fh.end(), binary.begin(), binary.end()); + } + + // b) Compute number of rows needed and pad with zeros + int rn = static_cast(fh.size()) / dim + (fh.size() % dim > 0 ? 1 : 0); + fh.resize(rn * dim, static_cast(0)); // Pad tail to make length = rn*dim + + // c) Reshape into a 2D matrix of size [rn][dim] + std::vector> reshaped(rn, std::vector(dim)); + for (int i = 0; i < rn; ++i) { + std::copy( + fh.begin() + i * dim, + fh.begin() + (i + 1) * dim, + reshaped[i].begin() + ); + } + + return { reshaped, binaries }; + } + // If placing each packet’s bytes on its own row + else { + // a) Determine maximum packet length + size_t maxLength = 0; + for (const auto& binary : binaries) { + maxLength = std::max(maxLength, binary.size()); + } + + // b) Build one row per packet, padding each to maxLength with `fill` + std::vector> reshaped; + reshaped.reserve(binaries.size()); + for (const auto& binary : binaries) { + std::vector row = binary; // Copy raw bytes + row.resize(maxLength, static_cast(fill)); // Pad to uniform length + reshaped.push_back(std::move(row)); + } + + return { reshaped, binaries }; + } + } +}; \ No newline at end of file diff --git a/heiFIP/images/flow_tiled_auto.cpp b/heiFIP/images/flow_tiled_auto.cpp new file mode 100644 index 0000000..cf9e79f --- /dev/null +++ b/heiFIP/images/flow_tiled_auto.cpp @@ -0,0 +1,256 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "heiFIPPacketImage.cpp" +#include "NetworkTrafficImage.hpp" + +/** + * @class FlowImageTiledAuto + * @brief Builds a square, tiled image from a sequence of packet images, automatically determining tile dimensions. + * + * Inherits from NetworkTrafficImage, which stores a default fill value and base dimension. + * Responsibilities: + * - Convert each packet’s raw bytes into its own dim×dim tile, padding/truncating as needed. + * - Arrange all those tiles into a larger square grid (dim_total×dim_total), where dim_total = ceil(sqrt(numTiles)). + * - Provide getters for the final tiled matrix and the original per-packet binaries. + */ +class FlowImageTiledAuto : public NetworkTrafficImage { +public: + /** + * @brief Constructor: prepare tiled flow image using automatic dimension calculation if requested. + * + * @param packets Vector of heiFIPPacketImage, each containing raw bytes for one packet. + * @param dim Base dimension for each packet’s tile (width = height = dim) if auto_dim=false. + * If auto_dim=true, each tile’s dim is recalculated as ceil(sqrt(maxPacketLength)). + * @param fill Byte value (0–255) used to pad shorter packet byte arrays when building each tile. + * @param auto_dim If true, automatically set each tile’s dim = ceil(sqrt(max length among all packets)). + * + * Workflow: + * 1. Call NetworkTrafficImage(fill, dim) to store base fill and dim. + * 2. Store given `packets` and `auto_dim` flag in members. + * 3. Call get_matrix_tiled(fill, dim, auto_dim, packets), which: + * a. Extracts raw bytes from each packet image. + * b. Finds max packet length; if auto_dim, compute dim = ceil(sqrt(maxLength)). + * c. For each packet, reshape its bytes into a dim×dim tile (row-major), padding with `fill`. + * d. Compute dim_total = ceil(sqrt(numPackets)). + * e. Arrange all packet tiles into a dim_total×dim_total grid by: + * • Placing tiles row by row, concatenating horizontally via npconcatenate(). + * • Padding with zero tiles (via npzero()) if fewer than dim_total² packets. + * f. Return {tiledMatrix, binaries}, where binaries is the vector of each packet’s raw byte vector. + * 4. Store the returned tiled matrix and binaries in member variables. + */ + FlowImageTiledAuto(const std::vector& packets, int dim = 16, int fill = 0, bool auto_dim = false) + : NetworkTrafficImage(fill, dim), packets(packets), auto_dim(auto_dim) + { + auto result = get_matrix_tiled(fill, dim, auto_dim, packets); + matrix = std::move(result.first); + binaries = std::move(result.second); + } + + /** + * @brief Get the final tiled image matrix (square of tiles stacked). + * @return Reference to a 2D vector of size [dim_total*dim][dim_total*dim]. + */ + const std::vector>& get_matrix() const { + return matrix; + } + + /** + * @brief Get the raw byte vectors for each packet (binaries used to build tiles). + * @return Reference to a vector of vectors, one per packet. + */ + std::vector>& get_binaries() { + return binaries; + } + +private: + std::vector packets; ///< Input packet images + bool auto_dim; ///< Whether to recalc tile dim = ceil(sqrt(maxPacketLength)) + std::vector> matrix; ///< Final tiled flow image + std::vector> binaries; ///< Raw byte vectors for each packet + + /** + * @brief Build per-packet tiles and assemble them into one large square matrix. + * + * @param fill Byte value to use when padding individual packet tiles. + * @param dim Base dimension for each packet tile (unless overridden by auto_dim). + * @param auto_dim If true, recompute dim = ceil(sqrt(max packet length)). + * @param packets Vector of heiFIPPacketImage, each containing raw bytes for one packet. + * @return pair: + * - first: 2D tiled image (size = dim_total*dim × dim_total*dim). + * - second: Original raw byte vectors (for reference). + * + * Workflow: + * 1. Extract raw bytes from each packet (packet.getHexData()) into `binaries`. + * 2. Determine max packet length across all binaries. + * 3. If auto_dim=true, set dim = ceil(sqrt(maxLength)). + * 4. For each packet’s byte vector `x`: + * a. Allocate a dim×dim tile, initialized to `fill`. + * b. Copy x[k] into tile[i][j] for k from 0 to x.size()-1, filling row-major: + * • i = k / dim, j = k % dim; stop when k ≥ x.size() or out of bounds. + * c. Store that tile in a temporary list `result` (vector of 2D arrays). + * 5. Compute dim_total = ceil(sqrt(numPackets)) → number of tiles per row/column. + * 6. Call tile_images(result, dim_total, dim) to arrange tiles into one big matrix: + * a. Build rows of concatenated tiles horizontally: each row has dim_total tiles side by side. + * Use npzero(dim) to fill missing tiles if numPackets < dim_total². + * Use npconcatenate() to join tiles horizontally (rows must have same height=dim). + * b. After building each row (dim rows high, width = dim_total*dim), stack all rows vertically. + * 7. Return {tiledMatrix, binaries}. + */ + std::pair>, std::vector>> + get_matrix_tiled(int fill, int dim, bool auto_dim, const std::vector& packets) { + // 1) Extract raw bytes from each packet and push into binaries + std::vector> binaries; + for (const heiFIPPacketImage& packet : packets) { + binaries.push_back(packet.getHexData()); + } + + // 2) Determine the maximum length among all packet byte vectors + size_t length = 0; + for (const auto& b : binaries) { + length = std::max(length, b.size()); + } + + // 3) If auto_dim=true, set each tile’s dim = ceil(sqrt(length)) + if (auto_dim) { + dim = static_cast(std::ceil(std::sqrt(static_cast(length)))); + } + + // 4) Build a 3D list of per-packet dim×dim tiles + std::vector>> result; + for (const auto& x : binaries) { + // a) Initialize a dim×dim tile with `fill` + std::vector> reshaped(dim, std::vector(dim, static_cast(fill))); + + // b) Copy x[k] into reshaped row-major until x is exhausted or tile is filled + size_t k = 0; + for (int i = 0; i < dim && k < x.size(); ++i) { + for (int j = 0; j < dim && k < x.size(); ++j) { + reshaped[i][j] = x[k++]; + } + } + result.push_back(std::move(reshaped)); + } + + // 5) Compute dim_total = ceil(sqrt(number of tiles)) → grid is dim_total×dim_total tiles + size_t length_total = result.size(); + uint dim_total = static_cast(std::ceil(std::sqrt(static_cast(length_total)))); + + // 6) Arrange all tiles into a large tiled image + std::vector> fh = tile_images(result, dim_total, dim); + return { fh, binaries }; + } + + /** + * @brief Create a dim×dim tile filled with zeros. + * + * @param dim Dimension for both width and height. + * @return 2D vector of size [dim][dim], all elements = 0. + * + * Why: + * - Used to fill grid slots when numPackets < dim_total², ensuring the final image remains square. + */ + std::vector> npzero(size_t dim) { + return std::vector>(dim, std::vector(dim, static_cast(0))); + } + + /** + * @brief Horizontally concatenate two same-height images (2D arrays). + * + * @param img1 First image: vector of rows, each row is a vector. + * @param img2 Second image: must have same number of rows as img1. + * @return Concatenated image: each row is img1[row] followed by img2[row]. + * + * Throws: + * - std::invalid_argument if img1 and img2 have different heights. + * + * Why: + * - Used in tile_images() to join tiles side by side when building each row of the grid. + */ + std::vector> npconcatenate(const std::vector>& img1, + const std::vector>& img2) + { + if (img1.empty()) return img2; + if (img2.empty()) return img1; + + if (img1.size() != img2.size()) { + throw std::invalid_argument("Images must have the same number of rows to concatenate horizontally."); + } + + std::vector> result = img1; + for (size_t i = 0; i < result.size(); ++i) { + result[i].insert(result[i].end(), img2[i].begin(), img2[i].end()); + } + return result; + } + + /** + * @brief Arrange a list of per-packet tiles into a single large square image. + * + * @param images 3D vector: [numTiles][dim][dim], each is a dim×dim tile. + * @param cols Number of tiles per row/column in the final grid (dim_total). + * @param dim Dimension of each tile (width = height = dim). + * @return 2D vector of size [dim_total*dim][dim_total*dim], the tiled image. + * + * Workflow: + * 1. For each row i in [0..cols−1]: + * a. Initialize an empty 2D array `row` (to accumulate tile rows). + * b. For each column j in [0..cols−1]: + * - If k < images.size(), let im = images[k], else let im = npzero(dim). + * - If `row` is empty, set row = im; else row = npconcatenate(row, im). + * - Increment k. + * c. Append `row` to `rows` (vector of row-blocks). + * 2. Initialize `tiled` = rows[0]. + * 3. For i in [1..rows.size()−1], append rows[i] to the bottom of `tiled` using vector::insert. + * 4. Return `tiled`, which now has height = cols*dim and width = cols*dim. + * + * Why: + * - Ensures that if there are fewer tiles than cols², the missing slots are zero-filled, maintaining a square. + * - Maintains row-major order: first fill the top-left tile, then the next tile to its right, etc. + */ + std::vector> tile_images(const std::vector>>& images, + const uint cols, const uint dim) + { + std::vector>> rows; + size_t k = 0; // Tracks which tile we’re on + + // 1) Build each tile row (concatenate tiles horizontally) + for (size_t i = 0; i < cols; ++i) { + std::vector> row; // Start with an empty row-block + for (size_t j = 0; j < cols; ++j) { + std::vector> im; + if (k < images.size()) { + im = images[k]; // Use actual tile + } else { + im = npzero(dim); // Use zero tile if no more packets + } + + if (row.empty()) { + row = std::move(im); + } else { + row = npconcatenate(row, im); + } + ++k; + } + rows.push_back(std::move(row)); + } + + // 2) Stack all rows vertically to form the final tiled image + std::vector> tiled = std::move(rows[0]); + for (size_t i = 1; i < rows.size(); ++i) { + tiled.insert(tiled.end(), rows[i].begin(), rows[i].end()); + } + return tiled; + } +}; \ No newline at end of file diff --git a/heiFIP/images/flow_tiled_fixed.cpp b/heiFIP/images/flow_tiled_fixed.cpp new file mode 100644 index 0000000..cff317d --- /dev/null +++ b/heiFIP/images/flow_tiled_fixed.cpp @@ -0,0 +1,237 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "heiFIPPacketImage.cpp" +#include "NetworkTrafficImage.hpp" + +/** + * @class FlowImageTiledFixed + * @brief Builds a fixed-grid tiled image from a sequence of packet images. + * + * Inherits from NetworkTrafficImage, which provides base logic for traffic-based images. + * Responsibilities: + * - Convert each packet’s raw bytes into its own dim×dim tile, padding/truncating as needed. + * - Arrange all those tiles into a fixed-size grid with `cols` tiles per row and per column. + * - Provide getters for both the tiled matrix and the original per-packet binaries. + */ +class FlowImageTiledFixed : public NetworkTrafficImage { +public: + /** + * @brief Constructor: prepare tiled flow image using a fixed number of columns. + * + * @param packets Vector of heiFIPPacketImage, each containing raw bytes for one packet. + * @param dim Dimension for each packet’s tile (width = height = dim). + * @param fill Byte value (0–255) used to pad shorter packet byte arrays when building each tile. + * @param cols Number of tiles per row (and column) in the final grid. Grid is cols×cols tiles. + * + * Workflow: + * 1. Call NetworkTrafficImage(fill, dim) to store base fill and dim. + * 2. Store input `packets` and `cols` in member variables. + * 3. Call get_matrix_tiled(fill, dim, packets), which: + * a. Extracts raw bytes from each packet image into `binaries`. + * b. For each packet’s byte vector `x`: + * i. Allocate a dim×dim tile, initialized to `fill`. + * ii. Copy x[k] into tile[i][j] in row-major until x is exhausted or tile is filled. + * iii. Append that tile to a local list `result` (vector of 2D arrays). + * c. Call tile_images(result, cols, dim) to arrange exactly cols×cols tiles: + * i. Place tiles row by row, concatenating horizontally with npconcatenate(). + * ii. If there are fewer than cols² tiles, use npzero(dim) to fill missing slots. + * iii. Stack all rows vertically to form the final matrix. + * 4. Store the returned matrix and binaries in member variables. + */ + FlowImageTiledFixed(const std::vector& packets, int dim = 16, int fill = 0, int cols = 3) + : NetworkTrafficImage(fill, dim), packets(packets), cols(cols) + { + auto result = get_matrix_tiled(fill, dim, packets); + matrix = std::move(result.first); + binaries = std::move(result.second); + } + + /** + * @brief Get the final tiled image matrix (fixed size: cols*dim by cols*dim). + * @return Reference to a 2D vector representing the tiled image. + */ + const std::vector>& get_matrix() const { + return matrix; + } + + /** + * @brief Get the raw byte vectors for each packet (binaries used to build tiles). + * @return Reference to a vector of vectors, one per packet’s bytes. + */ + std::vector>& get_binaries() { + return binaries; + } + +private: + std::vector packets; ///< Input packet images + int cols; ///< Number of tiles per row/column + std::vector> matrix; ///< Final tiled flow image + std::vector> binaries; ///< Raw byte vectors for each packet + + /** + * @brief Build per-packet tiles and assemble them into a fixed-size grid. + * + * @param fill Byte value to use when padding individual packet tiles. + * @param dim Dimension for each packet tile (width = height = dim). + * @param packets Vector of heiFIPPacketImage, each containing raw bytes for one packet. + * @return pair: + * - first: 2D tiled image (size = cols*dim × cols*dim). + * - second: Original raw byte vectors (for reference). + * + * Workflow: + * 1. Extract raw bytes from each packet (packet.getHexData()) into `binaries`. + * 2. For each packet’s byte vector `x`: + * a. Allocate a dim×dim tile, initialized to `fill`. + * b. Copy x[k] into tile[i][j] in row-major until x is exhausted or tile is filled. + * c. Append that tile to a local list `result` (vector of 2D tiles). + * 3. Call tile_images(result, cols, dim) to arrange exactly cols×cols tiles: + * a. Iterate over `cols` rows; for each row, iterate `cols` columns: + * • If a tile is available (k < result.size()), use it; else use npzero(dim). + * • Concatenate horizontally onto `row` via npconcatenate(). + * b. Append each completed `row` to `rows`. + * c. Stack all `rows` vertically into one matrix: first row, then subsequent rows appended. + * 4. Return {tiledMatrix, binaries}. + */ + std::pair>, std::vector>> + get_matrix_tiled(int fill, int dim, const std::vector& packets) { + std::vector> binaries; + + // 1) Extract raw bytes from each heiFIPPacketImage + for (const heiFIPPacketImage& packet : packets) { + binaries.push_back(packet.getHexData()); + } + + // 2) Build a dim×dim tile for each packet’s bytes + std::vector>> result; + for (const auto& x : binaries) { + // a) Initialize a dim×dim tile filled with `fill` + std::vector> reshaped(dim, std::vector(dim, static_cast(fill))); + // b) Copy bytes into reshaped row-major + size_t k = 0; + for (size_t i = 0; i < static_cast(dim) && k < x.size(); ++i) { + for (size_t j = 0; j < static_cast(dim) && k < x.size(); ++j) { + reshaped[i][j] = x[k++]; + } + } + result.push_back(std::move(reshaped)); + } + + // 3) Arrange the tiles into a fixed cols×cols grid + std::vector> fh = tile_images(result, static_cast(cols), static_cast(dim)); + return { fh, binaries }; + } + + /** + * @brief Create a dim×dim tile filled entirely with zeros. + * + * @param dim Dimension for both width and height. + * @return 2D vector of size [dim][dim], all elements = 0. + * + * Why: + * - Used in tile_images() to fill missing slots when fewer than cols² packets are available. + */ + std::vector> npzero(size_t dim) { + return std::vector>(dim, std::vector(dim, static_cast(0))); + } + + /** + * @brief Horizontally concatenate two same-height images (2D arrays). + * + * @param img1 First image: vector of rows, each row is a vector. + * @param img2 Second image: must have same number of rows as img1. + * @return Concatenated image: each row is img1[row] followed by img2[row]. + * + * Throws: + * - std::invalid_argument if img1 and img2 have different heights. + * + * Why: + * - Used in tile_images() to join tiles side by side when building each row of the grid. + */ + std::vector> npconcatenate(const std::vector>& img1, + const std::vector>& img2) + { + if (img1.empty()) return img2; + if (img2.empty()) return img1; + + if (img1.size() != img2.size()) { + throw std::invalid_argument("Images must have the same number of rows to concatenate horizontally."); + } + + std::vector> result = img1; + for (size_t i = 0; i < result.size(); ++i) { + result[i].insert(result[i].end(), img2[i].begin(), img2[i].end()); + } + return result; + } + + /** + * @brief Arrange a list of per-packet tiles into one large fixed-grid image. + * + * @param images 3D vector: [numTiles][dim][dim], each is a dim×dim tile. + * @param cols Number of tiles per row/column in the final grid (fixed). + * @param dim Dimension of each tile (width = height = dim). + * @return 2D vector of size [cols*dim][cols*dim], the tiled image. + * + * Workflow: + * 1. For each row i in [0..cols−1]: + * a. Initialize an empty 2D array `row`. + * b. For each column j in [0..cols−1]: + * - If k < images.size(), let im = images[k]; else im = npzero(dim). + * - If `row` is empty, set row = im; else row = npconcatenate(row, im). + * - Increment k. + * c. Append this completed `row` (size = dim rows, width = cols*dim) into `rows`. + * 2. Initialize `tiled` = rows[0]. + * 3. For i in [1..rows.size()−1], append rows[i] to the bottom of `tiled`. + * 4. Return `tiled`, which now has height = cols*dim and width = cols*dim. + * + * Why: + * - Having a fixed number of columns ensures a consistent final image size even if the number + * of packets < cols² (missing slots become zero-filled tiles). + */ + std::vector> tile_images(const std::vector>>& images, + const uint cols, const uint dim) + { + std::vector>> rows; + size_t k = 0; // Tracks which tile we’re on + + // 1) Build each tile row (concatenate tiles horizontally) + for (size_t i = 0; i < cols; ++i) { + std::vector> row; // Start with an empty row-block + for (size_t j = 0; j < cols; ++j) { + std::vector> im; + if (k < images.size()) { + im = images[k]; // Use actual tile + } else { + im = npzero(dim); // Zero tile if fewer than cols² packets + } + + if (row.empty()) { + row = std::move(im); + } else { + row = npconcatenate(row, im); + } + ++k; + } + rows.push_back(std::move(row)); + } + + // 2) Stack all rows vertically to form the final tiled image + std::vector> tiled = std::move(rows[0]); + for (size_t i = 1; i < rows.size(); ++i) { + tiled.insert(tiled.end(), rows[i].begin(), rows[i].end()); + } + return tiled; + } +}; \ No newline at end of file diff --git a/heiFIP/images/markov_chain.cpp b/heiFIP/images/markov_chain.cpp new file mode 100644 index 0000000..047a410 --- /dev/null +++ b/heiFIP/images/markov_chain.cpp @@ -0,0 +1,270 @@ +#pragma once + +#include +#include +#include +#include +#include "heiFIPPacketImage.cpp" +#include "NetworkTrafficImage.hpp" + +/** + * @class MarkovTransitionMatrix + * @brief Base class for computing a normalized, grayscale Markov transition matrix from a sequence of symbols. + * + * Responsibilities: + * - Given a 1D vector of “transitions” (values in [0..15]), count the transitions between consecutive symbols. + * - Normalize each row of the count matrix so that probabilities sum to 1, then scale to [0..255]. + * - Return the resulting 16×16 matrix of uint8_t intensities. + */ +class MarkovTransitionMatrix : public NetworkTrafficImage { +public: + /** + * @brief Compute a 16×16 Markov transition matrix from a sequence of 4-bit symbols. + * + * @param transitions Vector of length L containing values 0..15. Each adjacent pair + * (transitions[k], transitions[k+1]) contributes to the count at [i][j]. + * @return 2D vector of size [16][16], where each cell holds a normalized probability + * scaled to [0..255]. Rows with zero total count remain all zeros. + * + * Workflow: + * 1. Allocate a 16×16 uintMatrix initialized to zero (counts of each transition). + * 2. For k in [0..L-2], let i = transitions[k], j = transitions[k+1]; increment uintMatrix[i][j]. + * 3. For each row i in uintMatrix: + * a. Compute sum = Σ_j uintMatrix[i][j]. + * b. If sum > 0, for each j: compute probability = uintMatrix[i][j] / sum. + * Then multiply by 255, clamp to [0..255], and store back as uint8_t. + * 4. Return the resulting 16×16 grayscale matrix. + * + * Why: + * - Captures the first-order Markov distribution between successive 4-bit values in a packet’s bit array. + * - Scaling to 0–255 yields a grayscale image representation suitable for CNNs or other image-based analysis. + */ + std::vector> transition_matrix(const std::vector& transitions) { + const size_t n = 16; + // 1) Initialize a 16×16 count matrix to zero + std::vector> uintMatrix(n, std::vector(n, 0)); + + // 2) Count transitions between consecutive symbols + for (size_t k = 0; k + 1 < transitions.size(); ++k) { + size_t i = transitions[k]; + size_t j = transitions[k + 1]; + uintMatrix[i][j] += 1; + } + + // 3) Normalize each row to probabilities and scale to [0..255] + for (auto& row : uintMatrix) { + double sum = 0.0; + // Compute total count for this row + for (double value : row) { + sum += value; + } + if (sum > 0.0) { + // Convert each count to a probability, multiply by 255, clamp, and cast to uint8_t + for (auto& value : row) { + double prob = static_cast(value) / sum; + double scaled = prob * 255.0; + // clamp to [0..255] + value = static_cast(std::clamp(scaled, 0.0, 255.0)); + } + } + // If sum == 0, leave row as all zeros + } + + return uintMatrix; + } +}; + +/** + * @class MarkovTransitionMatrixFlow + * @brief Builds a larger image by computing a Markov transition matrix for each packet in a flow, + * then arranging all 16×16 matrices into a fixed grid of tiles. + * + * Inherits from MarkovTransitionMatrix to leverage the transition_matrix() method. + * Responsibilities: + * - For each heiFIPPacketImage in `packets`, extract its 4-bit bit array and compute a 16×16 matrix. + * - Tile all per-packet matrices into a grid with `cols` tiles per row and column. + * - Store the final tiled matrix as a single 2D vector, accessible via get_matrix(). + */ +class MarkovTransitionMatrixFlow : public MarkovTransitionMatrix { +public: + /** + * @brief Constructor: compute and tile per-packet Markov matrices. + * + * @param packets Vector of heiFIPPacketImage, each representing one packet in the flow. + * @param cols Number of tiles per row and per column in the final grid (grid is cols×cols). + * + * Workflow: + * 1. Store `packets` and `cols`. + * 2. For each packet in `packets`: + * a. Call packet.bit_array() to get a vector of 4-bit values. + * b. Pass that vector to transition_matrix() to get a 16×16 grayscale matrix. + * c. Append that 16×16 matrix to a local list `result`. + * 3. Call tile_images(result, cols, 16) to arrange all 16×16 matrices into one large image: + * - Creates a cols×cols grid of 16×16 tiles. + * - If fewer than cols² matrices, fill missing spots with zero tiles (npzero). + * - Concatenate horizontally then vertically as necessary. + * 4. Store the final tiled image in member `matrix`. + */ + MarkovTransitionMatrixFlow(const std::vector& packets, uint cols = 4) + : packets(packets), cols(cols) + { + std::vector>> result; + // 2) Compute a 16×16 Markov matrix for each packet + for (const heiFIPPacketImage& packet : packets) { + std::vector transition = packet.bit_array(); + std::vector> m = transition_matrix(transition); + result.push_back(std::move(m)); + } + // 3) Tile all 16×16 matrices into a cols×cols grid + matrix = tile_images(result, cols, 16); + } + + /// Accessor for the final tiled flow image + const std::vector>& get_matrix() const { + return matrix; + } + +private: + std::vector packets; ///< Each packet in the flow + uint cols; ///< Number of tiles per row/column + MarkovTransitionMatrix transitionMatrix; ///< Base class instance (not strictly necessary) + std::vector> matrix; ///< Final tiled image composed of 16×16 tiles + + /** + * @brief Create a 16×16 tile filled with zeros (if a packet’s matrix is missing). + * + * @param dim Tile dimension (16 for Markov matrices). + * @return 2D vector of size [dim][dim], all zeros. + */ + std::vector> npzero(size_t dim) { + return std::vector>(dim, std::vector(dim, 0)); + } + + /** + * @brief Horizontally concatenate two same-height images (2D arrays). + * + * @param img1 First image: vector of rows, each row is a vector. + * @param img2 Second image: must have the same number of rows as img1. + * @return Concatenated image: each row is img1[row] followed by img2[row]. + * + * Throws: + * - std::invalid_argument if img1 and img2 have different heights. + * + * Why: + * - Used in tile_images() to join 16×16 tiles side by side when constructing each grid row. + */ + std::vector> npconcatenate(const std::vector>& img1, + const std::vector>& img2) + { + if (img1.empty()) return img2; + if (img2.empty()) return img1; + + if (img1.size() != img2.size()) { + throw std::invalid_argument("Images must have the same number of rows to concatenate horizontally."); + } + + std::vector> result = img1; + for (size_t i = 0; i < result.size(); ++i) { + result[i].insert(result[i].end(), img2[i].begin(), img2[i].end()); + } + return result; + } + + /** + * @brief Arrange a list of 16×16 tiles into one large square image of size [cols*dim][cols*dim]. + * + * @param images 3D vector: [numTiles][16][16], each a 16×16 grayscale matrix. + * @param cols Number of tiles per row/column in the final grid. + * @param dim Dimension of each tile (16). + * @return 2D vector of size [cols*dim][cols*dim], the tiled image. + * + * Workflow: + * 1. Initialize an empty vector `rows` to hold each combined grid-row. + * 2. Set k = 0 to track current tile index. + * 3. For each row i in [0..cols−1]: + * a. Initialize an empty 16×0 “row” block. + * b. For j in [0..cols−1]: + * - If k < images.size(), let im = images[k]; else use a zero tile npzero(dim). + * - If row is empty, set row = im; else row = npconcatenate(row, im). + * - Increment k. + * c. Append the completed row block (size = dim rows, width = cols*dim) to `rows`. + * 4. Initialize `tiled` = rows[0]. + * 5. For each subsequent row i in [1..rows.size()−1], append rows[i] to the bottom of `tiled`. + * 6. Return `tiled`. + * + * Why: + * - Ensures that if there are fewer than cols² packets, the missing grid slots are zero-filled tiles, + * preserving a square final image of consistent size. + */ + std::vector> tile_images(const std::vector>>& images, + const uint cols, const uint dim) + { + std::vector>> rows; + size_t k = 0; // Tracks which tile index we’re on + + // 1) Build each row of the tile grid + for (size_t i = 0; i < cols; ++i) { + std::vector> row; // Combined row of tiles + for (size_t j = 0; j < cols; ++j) { + std::vector> im; + if (k < images.size()) { + im = images[k]; // Use actual 16×16 tile + } else { + im = npzero(dim); // Use zero tile if no more images + } + + if (row.empty()) { + row = std::move(im); + } else { + row = npconcatenate(row, im); + } + ++k; + } + rows.push_back(std::move(row)); + } + + // 2) Stack all rows vertically to form final tiled image + std::vector> tiled = std::move(rows[0]); + for (size_t i = 1; i < rows.size(); ++i) { + tiled.insert(tiled.end(), rows[i].begin(), rows[i].end()); + } + return tiled; + } +}; + +/** + * @class MarkovTransitionMatrixPacket + * @brief Computes a single 16×16 Markov transition matrix for one packet and exposes it as an image. + * + * Inherits from MarkovTransitionMatrix to reuse transition_matrix(). + * Responsibilities: + * - Given one heiFIPPacketImage, extract its 4-bit bit array. + * - Compute the 16×16 transition matrix and store it as `matrix`. + * - Provide get_matrix() to retrieve that single matrix. + */ +class MarkovTransitionMatrixPacket : public MarkovTransitionMatrix { +public: + /** + * @brief Constructor: compute the Markov transition matrix for a single packet. + * + * @param packet heiFIPPacketImage containing raw packet bytes. + * + * Workflow: + * 1. Call packet.bit_array() to get a vector of 4-bit values. + * 2. Call transition_matrix(transition) to produce a 16×16 grayscale matrix. + * 3. Store the resulting matrix in member `matrix`. + */ + MarkovTransitionMatrixPacket(const heiFIPPacketImage packet) : packet(packet) { + std::vector transition = packet.bit_array(); + matrix = transition_matrix(transition); + } + + /// Accessor for the computed 16×16 matrix + const std::vector>& get_matrix() const { + return matrix; + } + +private: + heiFIPPacketImage packet; ///< The raw packet image to process + std::vector> matrix; ///< Resulting 16×16 transition matrix +}; \ No newline at end of file diff --git a/heiFIP/layers/dns.cpp b/heiFIP/layers/dns.cpp new file mode 100644 index 0000000..5ca48fc --- /dev/null +++ b/heiFIP/layers/dns.cpp @@ -0,0 +1,197 @@ +#pragma once + +#include +#include +#include + +#include "transport.cpp" + +/** + * @class DNSPacket + * @brief Extends TransportPacket to handle DNS-specific header and resource-record manipulation. + * + * Responsibilities: + * - Inherit all Ethernet, IP, and transport-level rewriting and hashing logic. + * - Locate the pcpp::DnsLayer, inspect query/answer/authority/additional sections. + * - Insert CustomDNS header and individual CustomDNSQR/CustomDNSRR layers for each record. + * - Remove the original DnsLayer and recompute checksums/lengths. + */ +class DNSPacket : public TransportPacket { +public: + /** + * @brief Constructor: delegates raw-packet ownership and layer maps to TransportPacket. + * + * @param rawPacketPointer unique_ptr to the raw pcpp::RawPacket containing full packet bytes. + * @param addressMapping Mapping of original→new MAC/IP addresses (populated previously). + * @param layerMap Map of protocol layers present (Ethernet, IP, TCP/UDP, DNS). + * + * Workflow: + * 1. Calls TransportPacket’s constructor, which in turn: + * - Rewrites Ethernet MACs (EtherPacket). + * - Rewrites IP addresses and computes IP-header hash (IPPacket). + * - Computes transport-layer hash and optionally strips payload (TransportPacket). + * 2. No DNS-specific work is done here; header_preprocessing() does the heavy lifting. + */ + DNSPacket(std::unique_ptr rawPacketPointer, + std::unordered_map addressMapping = {}, + std::unordered_map layerMap = {}) + : TransportPacket(std::move(rawPacketPointer), addressMapping, layerMap) + { + // Base constructor handles transport-layer setup; DNS logic in header_preprocessing(). + } + + /** + * @brief Insert CustomDNS, CustomDNSQR, and CustomDNSRR layers as needed. + * + * Workflow: + * 1. Locate the existing DnsLayer via Packet.getLayerOfType(). + * If none, return immediately. + * 2. For each DNS section (question, answer, authority, additional): + * a. If the section count > 0, call headerPreprocessingMessageType() with the DnsLayer and + * the section code ("qd" for questions, "an" for answers, "ns" for authority, "ar" for additional). + * b. headerPreprocessingMessageType() will iterate through each record in that section, + * create a new CustomDNSQR (for questions) or CustomDNSRR (for resource records), and add it. + * c. After adding each CustomDNSQR/CustomDNSRR, call Packet.computeCalculateFields() to update checksums. + * + * 3. After populating individual record layers, rebuild the DNS header itself: + * a. Retrieve the (possibly updated) DnsLayer via Packet.getLayerOfType(). + * b. Read fields from its dnshdr (queryOrResponse, opcode, aa, tc, rd, ra, z, ad, cd, rcode, and section counts). + * c. Build a new CustomDNS instance with those header fields. + * d. Insert CustomDNS into the packet just before the old DnsLayer’s position. + * e. Detach and delete the original DnsLayer. + * f. Call Packet.computeCalculateFields() to recalculate lengths and checksums upstream of the new DNS header. + */ + void header_preprocessing() override { + // 1) Find the existing DnsLayer for preprocessing + pcpp::DnsLayer* oldDNSForMessageProcessing = Packet.getLayerOfType(); + if (!oldDNSForMessageProcessing) { + // No DNS layer present; nothing to do + return; + } + + // 2) Process each DNS section individually: question (qd), answer (an), authority (ns), additional (ar) + if (oldDNSForMessageProcessing->getQueryCount() > 0) { + headerPreprocessingMessageType(oldDNSForMessageProcessing, "qd"); + } + if (oldDNSForMessageProcessing->getAnswerCount() > 0) { + headerPreprocessingMessageType(oldDNSForMessageProcessing, "an"); + } + if (oldDNSForMessageProcessing->getAuthorityCount() > 0) { + headerPreprocessingMessageType(oldDNSForMessageProcessing, "ns"); + } + if (oldDNSForMessageProcessing->getAdditionalRecordCount() > 0) { + headerPreprocessingMessageType(oldDNSForMessageProcessing, "ar"); + } + + // 3) After processing individual records, replace the DNS header itself + pcpp::DnsLayer* oldDNS = Packet.getLayerOfType(); + pcpp::dnshdr* dnsHeader = oldDNS->getDnsHeader(); + + // Build a new CustomDNS header using fields from the old DNS header + std::unique_ptr customDns = std::make_unique(); + customDns->qr = dnsHeader->queryOrResponse; + customDns->opcode = static_cast(dnsHeader->opcode); + customDns->aa = dnsHeader->authoritativeAnswer; + customDns->tc = dnsHeader->truncation; + customDns->rd = dnsHeader->recursionDesired; + customDns->ra = dnsHeader->recursionAvailable; + customDns->z = dnsHeader->zero; + customDns->ad = dnsHeader->authenticData; + customDns->cd = dnsHeader->checkingDisabled; + customDns->rcode = static_cast(dnsHeader->responseCode); + customDns->qdCount = oldDNS->getQueryCount(); + customDns->anCount = oldDNS->getAnswerCount(); + customDns->nsCount = oldDNS->getAuthorityCount(); + customDns->arCount = oldDNS->getAdditionalRecordCount(); + + // Insert the new CustomDNS layer immediately before the old DNS layer + pcpp::Layer* prev = oldDNS->getPrevLayer(); + Packet.insertLayer(prev, customDns.release()); + + // Detach and delete the old DNS layer + Packet.detachLayer(oldDNS); + delete oldDNS; + + // Recompute checksums and lengths for all layers upstream of the new DNS header + Packet.computeCalculateFields(); + } + +private: + /** + * @brief Process DNS “question” or “resource-record” sections by inserting custom layers. + * + * @param origDns Pointer to the original pcpp::DnsLayer being processed. + * @param messageType One of: + * - "qd" : question section + * - "an" : answer section + * - "ns" : authority section + * - "ar" : additional section + * + * Workflow for "qd" (question): + * 1. Call origDns->getFirstQuery() to retrieve the first DnsQuery. + * 2. While query != nullptr: + * a. Create CustomDNSQR(name, type) using q->getName(), q->getDnsType(). + * b. Add it as a new layer: Packet.addLayer(qrLayer.release()). + * c. Recompute checksums/lengths (Packet.computeCalculateFields()). + * d. Move to the next query: origDns->getNextQuery(q). + * + * Workflow for "an"/"ns"/"ar" (resource records): + * 1. Depending on messageType, call: + * - getFirstAnswer() for "an" + * - getFirstAuthority() for "ns" + * - getFirstAdditionalRecord() for "ar" + * 2. While resource record != nullptr: + * a. Create CustomDNSRR(name, type, TTL) using r->getName(), r->getDnsType(), r->getTTL(). + * b. Add the custom RR layer: Packet.addLayer(rrLayer.release()). + * c. Recompute checksums/lengths (Packet.computeCalculateFields()). + * d. Advance to next record: + * • getNextAnswer(r) if messageType == "an" + * • getNextAuthority(r) if messageType == "ns" + * • getNextAdditionalRecord(r) if messageType == "ar" + */ + void headerPreprocessingMessageType(pcpp::DnsLayer* origDns, const std::string& messageType) { + if (messageType == "qd") { + // Process question section + pcpp::DnsQuery* q = origDns->getFirstQuery(); + while (q) { + // Insert a CustomDNSQR for each question name/type + std::unique_ptr qrLayer = std::make_unique(q->getName(), q->getDnsType()); + Packet.addLayer(qrLayer.release()); + Packet.computeCalculateFields(); // Update lengths/checksums after adding + + // Advance to next question + q = origDns->getNextQuery(q); + } + } + else { + // Process resource-record sections (answer, authority, additional) + pcpp::DnsResource* r = nullptr; + if (messageType == "an") { + r = origDns->getFirstAnswer(); + } else if (messageType == "ns") { + r = origDns->getFirstAuthority(); + } else { // messageType == "ar" + r = origDns->getFirstAdditionalRecord(); + } + while (r) { + // Insert a CustomDNSRR(name, type, TTL) for each resource record + std::unique_ptr rrLayer = std::make_unique( + r->getName(), + r->getDnsType(), + r->getTTL() + ); + Packet.addLayer(rrLayer.release()); + Packet.computeCalculateFields(); // Update lengths/checksums after adding + + // Advance to next record in the respective section + if (messageType == "an") { + r = origDns->getNextAnswer(r); + } else if (messageType == "ns") { + r = origDns->getNextAuthority(r); + } else { // "ar" + r = origDns->getNextAdditionalRecord(r); + } + } + } + } +}; \ No newline at end of file diff --git a/heiFIP/layers/http.cpp b/heiFIP/layers/http.cpp new file mode 100644 index 0000000..654bef3 --- /dev/null +++ b/heiFIP/layers/http.cpp @@ -0,0 +1,413 @@ +#pragma once + +#include +#include +#include +#include + +#include "transport.cpp" +#include "PcapPlusPlusVersion.h" +#include "HttpLayer.h" + +/** + * @class HTTPPacket + * @brief Base class for HTTP-specific packet handling, inheriting from TransportPacket. + * + * Responsibilities: + * - Inherits all Ethernet, IP, and transport-layer rewriting and hashing logic. + * - Provides a placeholder for HTTP-level preprocessing in derived classes. + */ +class HTTPPacket : public TransportPacket { +public: + /** + * @brief Constructor: delegates raw-packet ownership and layer maps to TransportPacket. + * + * @param rawPacketPointer unique_ptr to the raw pcpp::RawPacket containing all bytes. + * @param addressMapping Mapping of original→new MAC/IP addresses (populated previously). + * @param layerMap Map of protocol layers present (Ethernet, IP, TCP/UDP, HTTP). + * + * Workflow: + * 1. Calls TransportPacket’s constructor, which in turn: + * - Rewrites Ethernet MAC addresses (EtherPacket). + * - Rewrites IP addresses and computes IP-header hash (IPPacket). + * - Computes transport-layer hash and optionally strips payload (TransportPacket). + * 2. No extra HTTP-specific work is done here; derived classes override header_preprocessing(). + */ + HTTPPacket(std::unique_ptr rawPacketPointer, + std::unordered_map addressMapping = {}, + std::unordered_map layerMap = {}) + : TransportPacket(std::move(rawPacketPointer), addressMapping, layerMap) + { + } + + /** + * @brief Placeholder for HTTP-level header preprocessing. + * + * Derived classes (HTTPRequestPacket, HTTPResponsePacket) override this to: + * - Extract the existing HTTP layer (request or response). + * - Build a custom HTTP layer (CustomHTTPRequest or CustomHTTPResponse). + * - Copy relevant fields (method, URI, headers, status code). + * - Remove the original HttpRequestLayer/HttpResponseLayer and insert the custom one. + * - Recompute checksums/lengths after replacement. + * + * By default, simply calls TransportPacket’s header_preprocessing() to preserve earlier logic. + */ + void header_preprocessing() override { + TransportPacket::header_preprocessing(); + } +}; + +/** + * @class HTTPRequestPacket + * @brief Handles HTTP request–specific hashing and custom-layer replacement. + * + * Responsibilities: + * - Compute a request-specific SHA-256 hash based on URI, method, and Accept header. + * - Optionally strip raw payload if certain layers (TLS without TCP/UDP or Raw without HTTP) exist. + * - In header_preprocessing(), replace the pcpp::HttpRequestLayer with CustomHTTPRequest. + */ +class HTTPRequestPacket : public HTTPPacket { +public: + /// Stores the SHA-256 hex digest of request-specific fields + std::string hash; + + /** + * @brief Constructor: compute an HTTP-request hash and possibly strip payload. + * + * @param rawPacketPointer unique_ptr to the raw pcpp::RawPacket. + * @param addressMapping Inherited address-mapping from lower layers. + * @param layerMap Inherited layer-presence map (includes "HTTP" if request layer exists). + * + * Workflow: + * 1. Call HTTPPacket constructor (and thus all base-class logic). + * 2. Invoke generateHash(), which: + * - Retrieves the HttpRequestLayer (if present). + * - Extracts URI, HTTP method, and Accept header. + * - Builds a comma-separated string: ",,". + * - Computes SHA-256 over that string and stores it in `hash`. + * 3. Call removeRawPayloadIfPresent(), which: + * - If "Raw" is in layerMap, find the HttpRequestLayer. + * - Remove all layers after it (thus stripping any payload). + * - Recompute checksums/lengths. + */ + HTTPRequestPacket(std::unique_ptr rawPacketPointer, + std::unordered_map addressMapping = {}, + std::unordered_map layerMap = {}) + : HTTPPacket(std::move(rawPacketPointer), addressMapping, layerMap) + { + generateHash(); + removeRawPayloadIfPresent(); + } + + /** + * @brief Replace the original HttpRequestLayer with a CustomHTTPRequest, copying key fields. + * + * Workflow: + * 1. Call TransportPacket::header_preprocessing() to ensure all lower-layer logic has run. + * 2. Locate the existing HttpRequestLayer (Packet.getLayerOfType). + * 3. Create a new CustomHTTPRequest instance (allocates on heap via make_unique). + * 4. Copy: + * - Method (GET, POST, etc.) into fields[0]. + * - URI path into fields[1]. + * - Common headers (User-Agent, Content-Type, Connection, Accept, Accept-Charset, + * Accept-Encoding, Cookie, TE) into corresponding indices of CustomHTTPRequest.fields. + * 5. Remove the original HTTPRequestLayer from Packet (Packet.removeLayer(HttpRequest)). + * 6. Add the custom layer (Packet.addLayer(customReq.release())). + * 7. Recompute checksums/lengths (Packet.computeCalculateFields()). + */ + void header_preprocessing() override { + // First, perform any transport-layer substitutions + HTTPPacket::header_preprocessing(); + + // Extract the original HTTP request layer + pcpp::HttpRequestLayer* origLayer = Packet.getLayerOfType(); + if (origLayer) { + // Allocate a new CustomHTTPRequest to hold rewritten header fields + std::unique_ptr customReq = std::make_unique(); + + // 1) Copy HTTP method (GET, POST, etc.) + std::string method = httpMethodEnumToString(origLayer->getFirstLine()->getMethod()); + customReq->fields[0].value = method; + + // 2) Copy request URI + std::string uri = origLayer->getFirstLine()->getUri(); + customReq->fields[1].value = uri; + + // 3) Copy common headers by name into designated indices + auto copyHeader = [&](const std::string& name, int idx) { + pcpp::HeaderField* fld = origLayer->getFieldByName(name); + if (fld) { + customReq->fields[idx].value = fld->getFieldValue(); + } + }; + copyHeader("User-Agent", 2); + copyHeader("Content-Type", 3); + copyHeader("Connection", 4); + copyHeader("Accept", 5); + copyHeader("Accept-Charset", 6); + copyHeader("Accept-Encoding", 7); + copyHeader("Cookie", 8); + copyHeader("TE", 9); + + // 4) Remove the original HTTP request layer + Packet.removeLayer(pcpp::HTTPRequest); + + // 5) Insert the CustomHTTPRequest layer into the packet + Packet.addLayer(customReq.release()); + + // 6) Recompute checksums and length fields for all affected headers + Packet.computeCalculateFields(); + } + + // No further HTTP-specific preprocessing; return to caller + } + +private: + /** + * @brief Build a SHA-256 hash from selected HTTP request fields (URI, method, Accept). + * + * Steps: + * 1. Locate the HttpRequestLayer via getLayerOfType(). + * 2. Extract the URI path, HTTP method enum converted to string, and the Accept header (if present). + * 3. Concatenate them: ",,". + * 4. Call SHA256(input.c_str(), length, digest), then hex-encode digest into `hash`. + * + * Why: + * - Create a concise fingerprint of the request’s most important identifying fields. + */ + void generateHash() { + pcpp::HttpRequestLayer* httpRequestLayer = Packet.getLayerOfType(); + if (httpRequestLayer != nullptr) { + std::string path = httpRequestLayer->getFirstLine()->getUri(); + std::string method = httpMethodEnumToString(httpRequestLayer->getFirstLine()->getMethod()); + std::string accept = ""; + if (auto* fld = httpRequestLayer->getFieldByName("Accept")) { + accept = fld->getFieldValue(); + } + + std::string input = path + "," + method + "," + accept; + unsigned char digest[SHA256_DIGEST_LENGTH]; + SHA256(reinterpret_cast(input.c_str()), input.size(), digest); + + std::ostringstream oss; + oss << std::hex << std::setw(2) << std::setfill('0'); + for (int i = 0; i < SHA256_DIGEST_LENGTH; ++i) { + oss << (int)digest[i]; + } + hash = oss.str(); + } + } + + /** + * @brief Convert an HttpMethod enum to its string representation (e.g., HttpGET → "GET"). + * + * @param method The HttpMethod enum value from PcapPlusPlus. + * @return std::string Uppercase string of the HTTP method name, or "UNKNOWN" if not recognized. + */ + std::string httpMethodEnumToString(pcpp::HttpRequestLayer::HttpMethod method) { + switch (method) { + case pcpp::HttpRequestLayer::HttpMethod::HttpGET: return "GET"; + case pcpp::HttpRequestLayer::HttpMethod::HttpHEAD: return "HEAD"; + case pcpp::HttpRequestLayer::HttpMethod::HttpPOST: return "POST"; + case pcpp::HttpRequestLayer::HttpMethod::HttpPUT: return "PUT"; + case pcpp::HttpRequestLayer::HttpMethod::HttpDELETE: return "DELETE"; + case pcpp::HttpRequestLayer::HttpMethod::HttpTRACE: return "TRACE"; + case pcpp::HttpRequestLayer::HttpMethod::HttpOPTIONS: return "OPTIONS"; + case pcpp::HttpRequestLayer::HttpMethod::HttpCONNECT: return "CONNECT"; + case pcpp::HttpRequestLayer::HttpMethod::HttpPATCH: return "PATCH"; + default: return "UNKNOWN"; + } + } + + /** + * @brief If a "Raw" payload layer is present (and no higher protocol like HTTP is acting on it), + * strip all layers that follow the HttpRequestLayer. + * + * Workflow: + * 1. Check if "Raw" is present in layer_map. + * 2. Locate the HttpRequestLayer. + * 3. Call Packet.removeAllLayersAfter(httpRequestLayer) to drop downstream payload layers. + * 4. Recompute checksums/lengths. + * + * Why: + * - When an HTTP request is encapsulated in TLS or other layers, the raw payload is not + * relevant for higher-level analysis; stripping it prevents extraneous data from persisting. + */ + void removeRawPayloadIfPresent() { + if (layer_map.find("Raw") != layer_map.end()) { + pcpp::HttpRequestLayer* httpRequestLayer = Packet.getLayerOfType(); + if (httpRequestLayer != nullptr) { + Packet.removeAllLayersAfter(httpRequestLayer); + } + Packet.computeCalculateFields(); + } + } +}; + +/** + * @class HTTPResponsePacket + * @brief Handles HTTP response–specific hashing and custom-layer replacement. + * + * Responsibilities: + * - Compute a response-specific SHA-256 hash based on Server, status code, and Connection header. + * - Optionally strip raw payload if a "Raw" layer is present. + * - In header_preprocessing(), replace pcpp::HttpResponseLayer with CustomHTTPResponse. + */ +class HTTPResponsePacket : public HTTPPacket { +public: + /// Stores the SHA-256 hex digest of response-specific fields + std::string hash; + + /** + * @brief Constructor: compute an HTTP-response hash and possibly strip payload. + * + * @param rawPacketPointer unique_ptr to the raw pcpp::RawPacket. + * @param addressMapping Inherited address-mapping from lower layers. + * @param layerMap Inherited layer-presence map (includes "HTTP" if response layer exists). + * + * Workflow: + * 1. Call HTTPPacket constructor (and all base-class logic). + * 2. generateHash(): + * - Locate HttpResponseLayer. + * - Extract Server header (if present), status code, and Connection header. + * - Build input string ",,". + * - Compute SHA-256 over that string and store in `hash`. + * 3. removeHttpPayloadIfPresent(): + * - If "Raw" is in layerMap, locate HttpResponseLayer. + * - Call Packet.removeAllLayersAfter(httpResponseLayer), stripping payload. + * - Recompute checksums/lengths. + */ + HTTPResponsePacket(std::unique_ptr rawPacketPointer, + std::unordered_map addressMapping = {}, + std::unordered_map layerMap = {}) + : HTTPPacket(std::move(rawPacketPointer), addressMapping, layerMap) + { + generateHash(); + removeHttpPayloadIfPresent(); + } + + /** + * @brief Replace the original HttpResponseLayer with a CustomHTTPResponse, copying key fields. + * + * Workflow: + * 1. Call TransportPacket/HTTPPacket header_preprocessing to ensure lower-layer logic is done. + * 2. Locate the existing HttpResponseLayer (Packet.getLayerOfType). + * 3. Allocate a new CustomHTTPResponse. + * 4. Copy: + * - Status code from firstLine into fields[0]. + * - Common headers (Connection, Content-Encoding, Content-Type, Server, Set-Cookie, Transfer-Encoding) + * into designated indices of CustomHTTPResponse.fields. + * 5. Remove the original HTTPResponseLayer (Packet.removeLayer(HTTPResponse)). + * 6. Add the custom layer (Packet.addLayer(customResp)), then recompute checksums/lengths. + */ + void header_preprocessing() override { + // First, perform any transport-layer and IP/Ethernet substitutions + HTTPPacket::header_preprocessing(); + + // Extract the original HTTP response layer + pcpp::HttpResponseLayer* origLayer = Packet.getLayerOfType(); + if (origLayer) { + // Allocate a new CustomHTTPResponse to hold rewritten header fields + CustomHTTPResponse* customResp = new CustomHTTPResponse(); + + // 1) Copy status code from the first line + auto* firstLine = origLayer->getFirstLine(); + std::string status = firstLine ? std::to_string(firstLine->getStatusCode()) : std::string(); + customResp->fields[0].value = status; + + // 2) Copy common response headers by name into designated indices + auto copyHeader = [&](const std::string& name, int idx) { + pcpp::HeaderField* fld = origLayer->getFieldByName(name); + if (fld) { + customResp->fields[idx].value = fld->getFieldValue(); + } + }; + copyHeader("Connection", 1); + copyHeader("Content-Encoding", 2); + copyHeader("Content-Type", 3); + copyHeader("Server", 4); + copyHeader("Set-Cookie", 5); + copyHeader("Transfer-Encoding", 6); + + // 3) Remove the original HTTP response layer + Packet.removeLayer(pcpp::HTTPResponse); + + // 4) Insert the CustomHTTPResponse layer + Packet.addLayer(customResp); + + // 5) Recompute checksums and lengths for all affected headers + Packet.computeCalculateFields(); + } + } + +private: + /** + * @brief Build a SHA-256 hash from selected HTTP response fields (Server, status code, Connection). + * + * Steps: + * 1. Locate the HttpResponseLayer via getLayerOfType(). + * 2. Extract the "Server" header value (if present), the status code from firstLine, and the "Connection" header. + * 3. Concatenate them: ",,". + * 4. Compute SHA-256 over that string and store in `hash`. + * + * Why: + * - Create a unique fingerprint of the response’s core identifying fields for indexing or deduplication. + */ + void generateHash() { + pcpp::HttpResponseLayer* respLayer = Packet.getLayerOfType(); + if (!respLayer) { + return; + } + + // Extract "Server" header (if it exists) + std::string server; + if (auto* fld = respLayer->getFieldByName("Server")) { + server = fld->getFieldValue(); + } + + // Extract status code from the first line + auto* firstLine = respLayer->getFirstLine(); + std::string statusCode = firstLine ? std::to_string(firstLine->getStatusCode()) : std::string(); + + // Extract "Connection" header (if it exists) + std::string connection; + if (auto* fld = respLayer->getFieldByName("Connection")) { + connection = fld->getFieldValue(); + } + + // Build the input string and compute SHA-256 + std::string input = server + "," + statusCode + "," + connection; + unsigned char digest[SHA256_DIGEST_LENGTH]; + SHA256(reinterpret_cast(input.c_str()), input.size(), digest); + + std::ostringstream oss; + oss << std::hex << std::setw(2) << std::setfill('0'); + for (int i = 0; i < SHA256_DIGEST_LENGTH; ++i) { + oss << static_cast(digest[i]); + } + hash = oss.str(); + } + + /** + * @brief If a "Raw" payload layer is present, strip all layers that follow the HttpResponseLayer. + * + * Workflow: + * 1. Check if "Raw" is in layer_map. + * 2. Locate the HttpResponseLayer via getLayerOfType(). + * 3. Call Packet.removeAllLayersAfter(httpResponseLayer) to drop downstream payload. + * 4. Recompute checksums/lengths (Packet.computeCalculateFields()). + * + * Why: + * - When an HTTP response is encapsulated in TLS or another higher-layer format, the raw data + * is not needed for HTTP-layer analysis; removing it prevents irrelevant bytes from persisting. + */ + void removeHttpPayloadIfPresent() { + if (layer_map.find("Raw") != layer_map.end()) { + pcpp::HttpResponseLayer* httpResponseLayer = Packet.getLayerOfType(); + if (httpResponseLayer != nullptr) { + Packet.removeAllLayersAfter(httpResponseLayer); + } + Packet.computeCalculateFields(); + } + } +}; \ No newline at end of file diff --git a/heiFIP/layers/init.cpp b/heiFIP/layers/init.cpp new file mode 100644 index 0000000..6894062 --- /dev/null +++ b/heiFIP/layers/init.cpp @@ -0,0 +1,195 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include + +#include "packet.cpp" +#include "dns.cpp" +#include "http.cpp" +#include "ip.cpp" +#include "ssh.cpp" +#include "transport.cpp" + +enum class SupportedHeaderType { + IP, + IPv6, + DNS, + HTTP_REQUEST, + HTTP_RESPONSE, + TCP, + UDP +}; + +/** + * List of supported headers in processing order. + */ +static const std::vector SUPPORTED_HEADERS = { + SupportedHeaderType::IP, + SupportedHeaderType::IPv6, + SupportedHeaderType::DNS, + SupportedHeaderType::HTTP_REQUEST, + SupportedHeaderType::HTTP_RESPONSE, + SupportedHeaderType::TCP, + SupportedHeaderType::UDP +}; + +/** + * PacketProcessorType indicates whether processing is active, + * and at what level (NONE or HEADER). + */ +enum class PacketProcessorType { + NONE = 1, + HEADER = 2 +}; + +/** + * PacketProcessor orchestrates per-packet handling across supported protocols, + * accumulates packets, and returns owning unique pointers to them. + */ +class PacketProcessor { + public: + /** + * Constructor initializes internal state. + * @param fileExtension expected file extension (e.g., "pcap" or "pcapng"). + */ + PacketProcessor(const std::string& fileExtension = "pcap") + : fileExtension(fileExtension) + { + // TLS session layer is available via PcapPlusPlus's TLSSessionLayer plugin. + // No explicit runtime load is required—just link against the TLS parsing library. + } + + /** + * Add a parsed packet to the internal buffer to be written later. + */ + void addPacket(pcpp::Packet* packet) { + packets.push_back(packet); + } + + /** + * Read packets from a PCAP file, preprocess them, and return FIPPacket pointers. + * @param filename path to the PCAP file + * @param type preprocessing type (NONE or HEADER) + * @param maxCount maximum number of packets to read (default 64) + */ + std::vector> readPacketsFile(const std::string& filename, PacketProcessorType type, + bool removeDuplicates = false, size_t maxCount = 64) { + + std::vector> result; + pcpp::PcapFileReaderDevice reader(filename); + + if (!reader.open()) { + return result; + } + + pcpp::RawPacket rawPacket; + std::unique_ptr rawPacketPt; + size_t count = 0; + + while (count < maxCount && reader.getNextPacket(rawPacket)) { + rawPacketPt = std::make_unique(rawPacket); + std::unique_ptr fippkt = preprocess(rawPacketPt, type); + + if (fippkt && !fippkt->getHash().empty()) { + auto res = hashDict.insert(fippkt->getHash()); + if (res.second) { // was inserted, new + result.push_back(std::move(fippkt)); + } else { + // This case occurs if two packets are the same (have the same hash value) + // which results in the packet not being used if remove_duplicates is set + if (!removeDuplicates) { + result.push_back(std::move(fippkt)); + } else { + std::cout << "[-] Warning: Duplicate packet with hash value " << fippkt->getHash() << " removed" << std::endl; + } + } + } else if (fippkt) { + result.push_back(std::move(fippkt)); + } + + ++count; + } + reader.close(); + return result; + } + + std::vector> readPacketsList(std::vector>& inputPackets, + PacketProcessorType type, bool removeDuplicates = false) { + std::vector> result; + for (std::unique_ptr& pktPtr : inputPackets) { + std::unique_ptr fippkt = preprocess(pktPtr, type); + if (!fippkt) {continue;} + + if (!fippkt->getHash().empty()) { + auto res = hashDict.insert(fippkt->getHash()); + if (res.second) { + result.push_back(std::move(fippkt)); + } else { + // This case occurs if two packets are the same (have the same hash value) + // which results in the packet not being used if remove_duplicates is set + if (!removeDuplicates) { + result.push_back(std::move(fippkt)); + } else { + std::cout << "[-] Warning: Duplicate packet with hash value " << fippkt->getHash() << " removed" << std::endl; + } + } + } else { + result.push_back(std::move(fippkt)); + } + } + return result; + } + + // TODO: Add methods to process packets by type + + private: + std::string fileExtension; + std::unordered_set hashDict; + std::vector packets; // Stored packets to write out + + /** + * Pre-process a raw pcpp::Packet into a FIPPacket subclass based on layers. + * Optionally invoke header preprocessing. + */ + std::unique_ptr preprocess(std::unique_ptr& packet, PacketProcessorType type) { + std::unique_ptr fippacket = std::make_unique(std::make_unique(*packet)); + std::unordered_map address_mapping = fippacket->getAdressMapping(); + std::unordered_map layer_map = fippacket->getLayerMap(); + // HTTP handling + if (layer_map.count("HTTP")) { + fippacket = std::make_unique(std::move(packet), address_mapping, layer_map); + } + else if (layer_map.count("HTTPRequest")) { + fippacket = std::make_unique(std::move(packet), address_mapping, layer_map); + } + else if (layer_map.count("HTTPResponse")) { + fippacket = std::make_unique(std::move(packet), address_mapping, layer_map); + } + // DNS handling + else if (layer_map.count("DNS")) { + fippacket = std::make_unique(std::move(packet), address_mapping, layer_map); + } + // Transport layer (TCP/UDP) + else if (layer_map.count("TCP") || layer_map.count("UDP")) { + fippacket = std::make_unique(std::move(packet), address_mapping, layer_map); + } + // Network layer (IPv4/IPv6) + else if (layer_map.count("IPv4") || layer_map.count("IPv6")) { + fippacket = std::make_unique(std::move(packet), address_mapping, layer_map); + } + // Data link layer (Ethernet) + else if (layer_map.count("Ethernet")) { + fippacket = std::make_unique(std::move(packet), address_mapping, layer_map); + } + + // Header preprocessing if requested + if (type == PacketProcessorType::HEADER) { + fippacket->header_preprocessing(); + } + return fippacket; + } +}; \ No newline at end of file diff --git a/heiFIP/layers/ip.cpp b/heiFIP/layers/ip.cpp new file mode 100644 index 0000000..5f47b62 --- /dev/null +++ b/heiFIP/layers/ip.cpp @@ -0,0 +1,372 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include // For SHA-256 hashing of header fields + +#include "packet.cpp" +#include "header.cpp" + +/** + * @class IPPacket + * @brief Extends EtherPacket to handle IPv4/IPv6 header rewriting and hashing. + * + * Responsibilities: + * - Upon construction, detect and rewrite IP addresses (IPv4 or IPv6). + * - Compute a hash based on selected IP header fields (version, fragment/traffic, protocol/hopLimit). + * - Optionally strip payload when certain layers (e.g., TLS without TCP/UDP) are present. + * - Perform header preprocessing by substituting standard IP layers with custom IPLayer instances. + * + * Inherits from EtherPacket, so Ethernet MAC rewriting occurs first (if present). + */ +class IPPacket : public EtherPacket { +public: + /** + * @brief Constructor: Rewrites IP addresses, computes a header-based hash, and optionally strips payload. + * + * @param rawPacketPointer Owned unique_ptr to pcpp::RawPacket containing raw bytes. + * @param addressMapping Initial IP address mapping (original → new). If empty, new addresses generated. + * @param layerMap Optional precomputed layer map (protocol→presence). If empty, base class extracted layers. + * + * Workflow: + * 1. Delegate to EtherPacket constructor, which handles: + * - MAC rewriting (if “Ethernet” present). + * - Building layer_map and initial SHA-256 of entire packet layers. + * 2. If “IPv4” is present in layer_map, call filterIPv4() to rewrite src/dst IPs: + * • mapAddress() → returns existing mapping or generates a random address. + * • Set new IPv4 addresses in the IPv4Layer. + * 3. Extract key IPv4 header fields (version, fragmentOffset, protocol) into a comma-separated string. + * 4. Call computeHash() on that string to override hash with IPv4-specific header hash. + * 5. If “TLS” exists without “TCP”/“UDP”, strip the IPv4 payload (set first payload byte to ‘\0’). + * 6. If “Raw” exists without higher-level “TCP”/“UDP”/“HTTP”, also strip payload similarly. + * 7. If “IPv6” is present instead of IPv4, perform analogous steps: + * • filterIPv6() rewrites src/dst IPv6 addresses. + * • Extract IPv6 header fields (version, trafficClass, hopLimit) for computeHash(). + * • Conditionally strip payload if “TLS” without transport or “Raw” without transport/HTTP. + */ + IPPacket(std::unique_ptr rawPacketPointer, + std::unordered_map addressMapping = {}, + std::unordered_map layerMap = {}) + : EtherPacket(std::move(rawPacketPointer), addressMapping, layerMap) + { + // If IPv4 layer is present, perform IPv4-specific rewriting and hashing + if (layerMap.count("IPv4")) { + filterIPv4(); // Rewrite source/dest IPv4 addresses based on address_mapping + + // Retrieve the IPv4Layer to extract header fields + auto ipLayer = Packet.getLayerOfType(); + // Build a comma-separated string of (version, fragmentOffset, protocol) + std::string hashInput = + std::to_string(ipLayer->getIPv4Header()->ipVersion) + "," + + std::to_string(ipLayer->getIPv4Header()->fragmentOffset) + "," + + std::to_string(ipLayer->getIPv4Header()->protocol); + computeHash(hashInput); // Compute SHA-256 over this header-specific string + + // If TLS is present but no TCP/UDP, strip the payload to sanitize + if (layerMap.count("TLS") && !(layerMap.count("TCP") || layerMap.count("UDP"))) { + ipLayer->getLayerPayload()[0] = '\0'; + } + // If Raw payload is present without transport or HTTP, strip it as well + if (layerMap.count("Raw") && !(layerMap.count("TCP") || layerMap.count("UDP") || layerMap.count("HTTP"))) { + ipLayer->getLayerPayload()[0] = '\0'; + } + } + // If IPv6 layer is present instead of IPv4, perform analogous steps + else if (layerMap.count("IPv6")) { + filterIPv6(); // Rewrite source/dest IPv6 addresses + + // Retrieve the IPv6Layer to extract header fields + auto ip6Layer = Packet.getLayerOfType(); + // Build a comma-separated string of (version, trafficClass, hopLimit) + std::string hashInput = + std::to_string(ip6Layer->getIPv6Header()->ipVersion) + "," + + std::to_string(ip6Layer->getIPv6Header()->trafficClass) + "," + + std::to_string(ip6Layer->getIPv6Header()->hopLimit); + computeHash(hashInput); // Compute SHA-256 over this IPv6-specific string + + // Strip payload when TLS present without transport + if (layerMap.count("TLS") && !(layerMap.count("TCP") || layerMap.count("UDP"))) { + ip6Layer->getLayerPayload()[0] = '\0'; + } + // Strip Raw payload if no transport or HTTP + if (layerMap.count("Raw") && !(layerMap.count("TCP") || layerMap.count("UDP") || layerMap.count("HTTP"))) { + ip6Layer->getLayerPayload()[0] = '\0'; + } + } + } + + /** + * @brief Override header_preprocessing to replace standard IP layers with custom IPLayer types. + * + * Reasons: + * - Certain analysis or transformations require a custom representation (CustomIPLayer / CustomIPv6Layer). + * - After detaching the original layer, recompute checksums to keep the packet consistent. + * + * Workflow (IPv4 case): + * 1. Find the existing IPv4Layer using Packet.getLayerOfType(). If none, return. + * 2. Store a pointer to the layer’s previous layer (prev). + * 3. Detach the original IPv4Layer from Packet (Packet.detachLayer(oldIp)). + * 4. Call header_preprocessing_ipv4(oldIp) to build a CustomIPLayer instance from old header fields. + * 5. Delete the old IPv4Layer object to free memory. + * 6. Insert the new CustomIPLayer after prev (Packet.insertLayer(prev, customLayer.release(), true)): + * - The 'true' flag tells PcapPlusPlus to update layer pointers automatically. + * 7. Recompute upper-layer length and checksum fields (Packet.computeCalculateFields()). + * + * The IPv6 case is analogous, substituting IPv6Layer with CustomIPv6Layer. + * Finally, call EtherPacket::header_preprocessing() to allow any Ethernet‐level adjustments. + */ + void header_preprocessing() override { + // IPv4 replacement logic + if (layer_map.count("IPv4")) { + pcpp::IPv4Layer* oldIp = Packet.getLayerOfType(); + if (!oldIp) return; // No IPv4 layer found—nothing to replace + + // Remember the layer that preceded the IPv4 layer + pcpp::Layer* prev = oldIp->getPrevLayer(); + // Detach the old IPv4 layer from the packet’s layer chain + Packet.detachLayer(oldIp); + + // Build a CustomIPLayer from the old header fields + std::unique_ptr customLayer = header_preprocessing_ipv4(oldIp); + delete oldIp; // Free the original layer’s memory + + // Insert the custom IPv4 layer into the same position + Packet.insertLayer(prev, customLayer.release(), true); + + // Recompute checksums/lengths for all upstream layers + Packet.computeCalculateFields(); + } + + // IPv6 replacement logic + if (layer_map.count("IPv6")) { + pcpp::IPv6Layer* oldIp = Packet.getLayerOfType(); + if (!oldIp) return; // No IPv6 layer found + + pcpp::Layer* prev = oldIp->getPrevLayer(); + // Build a CustomIPv6Layer from the old header fields + std::unique_ptr customLayer = header_preprocessing_ipv6(oldIp); + + // Insert the new layer before detaching the old one (to preserve layer pointers) + Packet.insertLayer(prev, customLayer.release()); + + // Now detach and delete the old IPv6 layer + Packet.detachLayer(oldIp); + delete oldIp; + + // Recompute length/checksum fields after substitution + Packet.computeCalculateFields(); + } + + // Delegate to EtherPacket for any Ethernet‐level preprocessing + EtherPacket::header_preprocessing(); + } + + /** + * @brief Build a CustomIPLayer from an existing IPv4Layer’s header fields. + * + * @param ipLayer Pointer to the original IPv4Layer being replaced. + * @return std::unique_ptr New layer capturing version, flags, TOS, TTL, protocol. + * + * Steps: + * 1. Extract pointers to the IPv4 header (ipVersion, fragmentOffset, protocol, etc.). + * 2. Compute fragment flags by shifting and masking the fragmentOffset field. + * 3. Construct a CustomIPLayer with (version, flags, typeOfService, timeToLive, protocol). + * + * Why: + * - CustomIPLayer can enforce custom behaviors (e.g., anonymization, logging) + * without modifying the original pcpp::IPv4Layer class. + */ + std::unique_ptr header_preprocessing_ipv4(pcpp::IPv4Layer* ipLayer) { + pcpp::iphdr* hdr = ipLayer->getIPv4Header(); + uint8_t version = hdr->ipVersion; + // Network-byte-order fragmentOffset: mask off the top 3 bits for flags + uint16_t fragOffset = ntohs(hdr->fragmentOffset); + uint8_t flags = static_cast((fragOffset >> 13) & 0x07); + + // Build the custom IPv4 layer with key header fields + return std::make_unique( + version, + flags, + hdr->typeOfService, + hdr->timeToLive, + hdr->protocol + ); + } + + /** + * @brief Build a CustomIPv6Layer from an existing IPv6Layer’s header fields. + * + * @param ipv6Layer Pointer to the original IPv6Layer being replaced. + * @return std::unique_ptr New layer capturing version, trafficClass, nextHeader, hopLimit. + * + * Why: + * - Similar to IPv4 case: isolate key fields for custom processing (e.g., anonymization or analysis). + */ + std::unique_ptr header_preprocessing_ipv6(pcpp::IPv6Layer* ipv6Layer) { + uint8_t ipVersion = ipv6Layer->getIPv6Header()->ipVersion; + uint8_t trafficClass = ipv6Layer->getIPv6Header()->trafficClass; + uint8_t nextHeader = ipv6Layer->getIPv6Header()->nextHeader; + uint8_t hopLimit = ipv6Layer->getIPv6Header()->hopLimit; + + return std::make_unique( + ipVersion, + trafficClass, + nextHeader, + hopLimit + ); + } + +private: + /// Stores the hash computed in the constructor or computeHash() + std::string hash; + + /** + * @brief Compute a SHA-256 hash over a short input string and store it in `hash`. + * + * @param input A comma-separated string of selected header fields (e.g., “4,0,6” for IPv4 version=4, fragOffset=0, proto=6). + * + * Workflow: + * 1. Call SHA256(input.c_str(), input.length(), resultBuffer). + * 2. Convert the 32-byte digest (256 bits) into a lowercase hex string. + * 3. Store the hex string in `hash`. + * + * Why: + * - Provide a concise, reproducible fingerprint of core IP header fields for deduplication or indexing. + */ + void computeHash(const std::string& input) { + unsigned char result[SHA256_DIGEST_LENGTH]; + SHA256(reinterpret_cast(input.c_str()), input.length(), result); + + std::ostringstream oss; + oss << std::hex << std::setfill('0'); + for (int i = 0; i < SHA256_DIGEST_LENGTH; ++i) { + oss << std::setw(2) << static_cast(result[i]); + } + hash = oss.str(); + } + + /** + * @brief Rewrite IPv4 source and destination addresses based on address_mapping. + * + * Steps: + * 1. Retrieve the IPv4Layer pointer from Packet. + * 2. Extract original src and dst IPs as std::string. + * 3. Call mapAddress() for each; if not present, generateRandomIPv4(). + * 4. Set the new IPv4 addresses on the IPv4Layer. + * + * Why: + * - Anonymize or normalize IP addresses consistently across packets (e.g., for privacy). + */ + void filterIPv4() { + auto ipLayer = Packet.getLayerOfType(); + std::string src = ipLayer->getSrcIPAddress().toString(); + std::string dst = ipLayer->getDstIPAddress().toString(); + + std::string newSrc = mapAddress(src); + std::string newDst = mapAddress(dst); + + ipLayer->setSrcIPv4Address(pcpp::IPv4Address(newSrc)); + ipLayer->setDstIPv4Address(pcpp::IPv4Address(newDst)); + } + + /** + * @brief Rewrite IPv6 source and destination addresses based on address_mapping. + * + * Steps: + * 1. Retrieve the IPv6Layer pointer from Packet. + * 2. Extract original src and dst IPv6 as std::string. + * 3. Call mapAddress(src, true) / mapAddress(dst, true); generateRandomIPv6() if missing. + * 4. Set the new IPv6 addresses on the IPv6Layer. + * + * Why: + * - Consistently anonymize or remap IPv6 addresses across packets. + */ + void filterIPv6() { + auto ip6Layer = Packet.getLayerOfType(); + std::string src = ip6Layer->getSrcIPAddress().toString(); + std::string dst = ip6Layer->getDstIPAddress().toString(); + + std::string newSrc = mapAddress(src, true); + std::string newDst = mapAddress(dst, true); + + ip6Layer->setSrcIPv6Address(pcpp::IPv6Address(newSrc)); + ip6Layer->setDstIPv6Address(pcpp::IPv6Address(newDst)); + } + + /** + * @brief Return a new or existing mapping for an IP address string. + * + * @param oldAddr The original IP address string (IPv4 or IPv6). + * @param isIPv6 If true, call generateRandomIPv6(); otherwise generateRandomIPv4(). + * @return std::string The mapped or newly generated IP address. + * + * Logic: + * - If oldAddr exists in address_mapping, return the stored value. + * - Otherwise, generate a random address (IPv4 or IPv6), store it in mapping, and return it. + * + * Why: + * - Ensure consistent rewriting: future packets with the same original address map to the same new one. + */ + std::string mapAddress(const std::string& oldAddr, bool isIPv6 = false) { + if (address_mapping.count(oldAddr)) { + return address_mapping[oldAddr]; + } + std::string newAddr = isIPv6 ? generateRandomIPv6() : generateRandomIPv4(); + address_mapping[oldAddr] = newAddr; + return newAddr; + } + + /** + * @brief Generate a random IPv4 address in dotted-decimal format. + * + * Steps: + * 1. Seed the RNG using current time (std::time(nullptr)). + * 2. Generate four octets (0–255) and join with dots. + * + * Why: + * - Provide a pseudorandom anonymized IPv4 when no mapping exists. + */ + std::string generateRandomIPv4() { + std::srand(static_cast(std::time(nullptr))); + std::string ip; + for (int i = 0; i < 4; ++i) { + int octet = std::rand() % 256; + ip += std::to_string(octet); + if (i < 3) ip += "."; + } + return ip; + } + + /** + * @brief Generate a random IPv6 address in standard colon-hex format. + * + * Steps: + * 1. Seed the RNG using current time. + * 2. Generate eight 16-bit blocks (0–65535) and format each as four-digit hex. + * 3. Join blocks with colons. + * + * Why: + * - Provide a pseudorandom anonymized IPv6 when no mapping exists. + */ + std::string generateRandomIPv6() { + std::srand(static_cast(std::time(nullptr))); + std::ostringstream oss; + for (int i = 0; i < 8; ++i) { + int block = std::rand() % 0x10000; + oss << std::hex << std::setw(4) << std::setfill('0') << block; + if (i < 7) oss << ":"; + } + return oss.str(); + } +}; \ No newline at end of file diff --git a/heiFIP/layers/packet.cpp b/heiFIP/layers/packet.cpp new file mode 100644 index 0000000..cd85cac --- /dev/null +++ b/heiFIP/layers/packet.cpp @@ -0,0 +1,374 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include // Using SHA-256 for hashing layer bytes +#include +#include + +/** + * @class FIPPacket + * @brief Base class for wrapping a pcpp::RawPacket and extracting features. + * + * Responsibilities: + * - Store a unique_ptr to a RawPacket (pcpp::RawPacket). + * - Build a pcpp::Packet from the RawPacket for easy layer access. + * - Generate a SHA-256 hash of all protocol-layer bytes. + * - Build a map of which protocol layers are present. + * - Maintain an address_mapping for rewriting addresses (e.g., MAC or IP). + */ +class FIPPacket { +private: + /// Unique pointer that owns the raw packet bytes (pcpp::RawPacket). + std::unique_ptr rawPtr; + +protected: + /// Parsed packet object created from rawPtr for layer-level manipulations. + pcpp::Packet Packet; + + /** + * @brief Maps original address strings (e.g., MAC/IP) to rewritten addresses. + * + * Used by derived classes (e.g., EtherPacket) to replace source/dest addresses consistently. + * Key: original address string. Value: new (random or mapped) address string. + */ + std::unordered_map address_mapping; + + /** + * @brief Indicates which protocol layers are present in this packet. + * + * Key: protocol name (e.g., "Ethernet", "IPv4", "TCP"). + * Value: true if that layer is present, false otherwise (currently always true for present layers). + */ + std::unordered_map layer_map; + + /// Hex-encoded SHA-256 hash of all concatenated layer bytes. + std::string hash; + + /** + * @brief Concatenates all layer data and computes a SHA-256 hash. + * + * Steps: + * 1. Iterate through each layer in Packet (using getFirstLayer()/getNextLayer()). + * 2. Write raw bytes of each layer into a std::ostringstream. + * 3. Compute SHA-256 over the concatenated byte buffer. + * 4. Convert the resulting digest into a lowercase hex string. + * + * @return std::string Hex-encoded SHA-256 of the packet’s layered bytes. + */ + std::string generate_sha256() { + // 1) Gather all layer-bytes into a continuous buffer + std::ostringstream raw_stream; + for (pcpp::Layer* layer = Packet.getFirstLayer(); layer; layer = layer->getNextLayer()) { + raw_stream.write( + reinterpret_cast(layer->getData()), + layer->getDataLen() + ); + } + std::string data = raw_stream.str(); + + // 2) Compute SHA-256 digest + unsigned char digest[SHA256_DIGEST_LENGTH]; + SHA256( + reinterpret_cast(data.data()), + data.size(), + digest + ); + + // 3) Convert digest to hex string + std::ostringstream hash_stream; + hash_stream << std::hex << std::setfill('0'); + for (int i = 0; i < SHA256_DIGEST_LENGTH; ++i) { + hash_stream << std::setw(2) << static_cast(digest[i]); + } + return hash_stream.str(); + } + + /** + * @brief Populates layer_map with all protocol layers present in this Packet. + * + * For each layer in Packet: + * - Get its ProtocolType via layer->getProtocol(). + * - Convert that enum to a human-readable string. + * - Insert {layerName, true} into layer_map. + * + * Called by the constructor if the user did not supply a prebuilt layer_map. + */ + void extract_layers() { + layer_map.clear(); + for (pcpp::Layer* layer = Packet.getFirstLayer(); layer; layer = layer->getNextLayer()) { + std::string layerName = getProtocolTypeAsString(layer->getProtocol()); + layer_map.insert({layerName, true}); + } + } + + /** + * @brief Convert a pcpp::ProtocolType enum to a std::string. + * + * Recognizes common protocols; defaults to "Unknown" otherwise. + * + * @param protocolType The ProtocolType enum from PcapPlusPlus. + * @return std::string Human-readable protocol name. + */ + std::string getProtocolTypeAsString(pcpp::ProtocolType protocolType) { + switch (protocolType) { + case pcpp::Ethernet: + return "Ethernet"; + case pcpp::IPv4: + return "IPv4"; + case pcpp::IPv6: + return "IPv6"; + case pcpp::TCP: + return "TCP"; + case pcpp::UDP: + return "UDP"; + case pcpp::HTTPRequest: + return "HTTPRequest"; + case pcpp::HTTPResponse: + return "HTTPResponse"; + case pcpp::DNS: + return "DNS"; + default: + return "Unknown"; + } + } + +public: + /** + * @brief Constructor: Wraps a unique_ptr and initializes metadata. + * + * @param rawPacketPointer A unique_ptr pointing to a dynamically allocated RawPacket. + * @param addr_map Optional mapping from original addresses → replacement addresses. + * @param lmap Optional precomputed map of layer presence. If empty, extract_layers() is called. + * + * Workflow: + * 1. Move rawPacketPointer into rawPtr (taking ownership). + * 2. Construct a pcpp::Packet from rawPtr.get(), enabling layer inspection. + * 3. If the user provided a non-empty lmap, copy it into layer_map; + * otherwise call extract_layers() to detect layers automatically. + * 4. Compute the SHA-256 hash over all layer bytes via generate_sha256(). + */ + FIPPacket(std::unique_ptr rawPacketPointer, + const std::unordered_map& addr_map = {}, + const std::unordered_map& lmap = {}) + : address_mapping(addr_map), + rawPtr(std::move(rawPacketPointer)) + { + // Build a pcpp::Packet wrapper around the raw data + Packet = pcpp::Packet(rawPtr.get()); + + // Populate layer_map: either use provided lmap or auto-detect + if (lmap.empty()) { + extract_layers(); + } else { + layer_map = lmap; + } + + // Compute the SHA-256 hash over all concatenated layer bytes + hash = generate_sha256(); + } + + /// Virtual destructor ensures proper cleanup in derived classes + virtual ~FIPPacket() = default; + + /** + * @brief Placeholder method for header preprocessing in derived classes. + * + * Derived classes (e.g., EtherPacket) can override to implement protocol-specific + * header manipulation (e.g., rewriting MAC addresses, stripping fields). + */ + virtual void header_preprocessing() { + // Base class does not modify headers by default. + } + + /** + * @brief Retrieve the SHA-256 hash string for this packet. + * @return const std::string& Reference to the precomputed hash string. + */ + const std::string& getHash() const { + return hash; + } + + /** + * @brief Retrieve a const reference to the layer presence map. + * @return const std::unordered_map& layer_map indicating present protocols. + */ + const std::unordered_map& getLayerMap() const { + return layer_map; + } + + /** + * @brief Retrieve a const reference to the address mapping used when rewriting addresses. + * @return const std::unordered_map& Mapping of original → new addresses. + */ + const std::unordered_map& getAdressMapping() const { + return address_mapping; + } + + /** + * @brief Access the underlying RawPacket pointer for additional, low-level operations. + * @return pcpp::RawPacket* Raw pointer to the owned RawPacket. + */ + pcpp::RawPacket* getRawPacket() { + return rawPtr.get(); + } + + /** + * @brief Const overload: Access the underlying RawPacket pointer without modification. + * @return const pcpp::RawPacket* Const raw pointer to the owned RawPacket. + */ + const pcpp::RawPacket* getRawPacket() const { + return rawPtr.get(); + } +}; + +/** + * @class UnknownPacket + * @brief A concrete subclass of FIPPacket for packets with no protocol-specific preprocessing. + * + * Often used as a default when the type of packet is not recognized or does not require special handling. + */ +class UnknownPacket : public FIPPacket { +public: + /** + * @brief Constructor: Passes parameters to base FIPPacket. + * + * @param rawPacketPointer Owned unique_ptr to RawPacket. + * @param addr_map Optional address rewriting map. + * @param lmap Optional precomputed layer map. + * + * Simply delegates to FIPPacket’s constructor. No additional extraction logic. + */ + UnknownPacket(std::unique_ptr rawPacketPointer, + const std::unordered_map& addr_map = {}, + const std::unordered_map& lmap = {}) + : FIPPacket(std::move(rawPacketPointer), addr_map, lmap) + {} + + /** + * @brief Override placeholder for header preprocessing. + * + * Calls base-class header_preprocessing (which does nothing), but can be extended in future. + */ + void header_preprocessing() override { + FIPPacket::header_preprocessing(); + } +}; + +/** + * @brief Generate a random MAC address string in uppercase hex format (e.g., "A1:B2:C3:D4:E5:F6"). + * + * Steps: + * 1. For each of 6 octets, generate a random number 0–255 (using rand()). + * 2. Format each octet as two uppercase hex digits, separated by colons. + * + * @return std::string A randomly generated 6-byte MAC address. + */ +std::string generate_random_mac() { + std::stringstream mac; + mac << std::hex << std::uppercase << std::setw(2) << std::setfill('0') << (rand() % 256); + for (int i = 0; i < 5; ++i) { + mac << ":" << std::setw(2) << std::setfill('0') << (rand() % 256); + } + return mac.str(); +} + +/** + * @class EtherPacket + * @brief A subclass of FIPPacket that specifically handles Ethernet-layer address rewriting. + * + * Responsibilities: + * - On construction, detect if the packet contains an Ethernet layer. + * - If so, replace source and destination MAC addresses according to address_mapping, + * or generate new random MACs if none exist in the map. + * - Update the address_mapping so future packets with the same original MAC map to the same new MAC. + */ +class EtherPacket : public FIPPacket { +public: + /** + * @brief Constructor: Initializes base class and invokes __filter if Ethernet is present. + * + * @param rawPacketPointer Owned unique_ptr to the RawPacket. + * @param addr_map Initial address mapping (original → new MAC). May be empty. + * @param lmap Initial layer presence map. If empty, base class will extract layers. + * + * Workflow: + * 1. Delegate to FIPPacket constructor, which parses layers and computes hash. + * 2. Check if "Ethernet" appears in layer_map; if so, call __filter() to rewrite MACs. + */ + EtherPacket(std::unique_ptr rawPacketPointer, + const std::unordered_map& addr_map = {}, + const std::unordered_map& lmap = {}) + : FIPPacket(std::move(rawPacketPointer), addr_map, lmap) + { + // If this packet has an Ethernet layer, perform address rewriting + if (layer_map.find("Ethernet") != layer_map.end()) { + __filter(); + } + } + + /** + * @brief Inspect and modify the Ethernet source/destination MAC addresses. + * + * Steps: + * 1. Retrieve the EthLayer pointer from Packet (pcpp::Packet::getLayerOfType()). + * 2. If no Ethernet layer is found, return immediately. + * 3. Read the original src and dst MAC addresses (as strings). + * 4. For each address, check if it already exists in address_mapping: + * - If yes, use the mapped value. + * - If no, generate a random MAC via generate_random_mac(), insert into address_mapping. + * 5. Set the new MAC addresses on the EthLayer (pcpp::MacAddress). + */ + void __filter() { + // Get the Ethernet layer from the parsed packet + pcpp::EthLayer* ethLayer = Packet.getLayerOfType(); + if (ethLayer == nullptr) { + // Packet does not actually contain an Ethernet layer + return; + } + + // Extract original MAC addresses as strings + std::string previous_src = ethLayer->getSourceMac().toString(); + std::string previous_dst = ethLayer->getDestMac().toString(); + + std::string new_src, new_dst; + + // Determine or generate a replacement for the source MAC + if (address_mapping.count(previous_src) > 0) { + new_src = address_mapping[previous_src]; + } else { + new_src = generate_random_mac(); + address_mapping[previous_src] = new_src; + } + + // Determine or generate a replacement for the destination MAC + if (address_mapping.count(previous_dst) > 0) { + new_dst = address_mapping[previous_dst]; + } else { + new_dst = generate_random_mac(); + address_mapping[previous_dst] = new_dst; + } + + // Apply the new addresses back into the EthLayer + ethLayer->setSourceMac(pcpp::MacAddress(new_src)); + ethLayer->setDestMac(pcpp::MacAddress(new_dst)); + } + + /** + * @brief Override header preprocessing for EtherPacket. + * + * Currently, this simply invokes the base-class behavior (which is a no-op), + * but can be extended to add further Ethernet-specific modifications if needed. + */ + void header_preprocessing() override { + FIPPacket::header_preprocessing(); + // Future: add any additional preprocessing steps here (e.g., VLAN tag stripping). + } +}; \ No newline at end of file diff --git a/heiFIP/layers/ssh.cpp b/heiFIP/layers/ssh.cpp new file mode 100644 index 0000000..ffd83da --- /dev/null +++ b/heiFIP/layers/ssh.cpp @@ -0,0 +1,57 @@ +#pragma once + +#include "transport.cpp" + +/** + * @class SSHPacketProcessor + * @brief Handles SSH-specific packet processing by extending TransportPacket. + * + * Responsibilities: + * - Inherits all Ethernet, IP, and transport-layer rewriting and hashing logic. + * - Provides a hook for SSH-specific header preprocessing in the future. + * - Currently, it simply delegates to TransportPacket for all work. + */ +class SSHPacketProcessor : public TransportPacket { +public: + /** + * @brief Constructor: initialize SSH packet processor with given raw packet and mappings. + * + * @param rawPacketPointer unique_ptr to the raw pcpp::RawPacket containing packet bytes. + * @param addressMapping Mapping of original → rewritten MAC/IP addresses (populated by base classes). + * @param layerMap Map of protocol layers present (Ethernet, IP, TCP/UDP, SSH). + * + * Workflow: + * 1. Calls TransportPacket constructor, which in turn: + * - Rewrites Ethernet MACs (EtherPacket). + * - Rewrites IP addresses and computes IP-header hash (IPPacket). + * - Computes transport-layer hash and optionally strips payload (TransportPacket). + * 2. No SSH-specific logic in this constructor; header_preprocessing() can be overridden as needed. + */ + SSHPacketProcessor(std::unique_ptr rawPacketPointer, + std::unordered_map addressMapping = {}, + std::unordered_map layerMap = {}) + : TransportPacket(std::move(rawPacketPointer), addressMapping, layerMap) + {} + + /** + * @brief Override header preprocessing to allow SSH-specific modifications. + * + * Workflow: + * 1. Call TransportPacket::header_preprocessing() to apply all lower-layer logic: + * - Ethernet MAC rewriting + * - IP address rewriting and IP-header hashing + * - TCP/UDP hashing and optional payload stripping + * 2. (Currently a placeholder) Insert SSH-specific header modifications here. + * + * Why: + * - SSH packets may require rewriting or sanitizing certain payload bytes, ports, or flags. + * - By overriding this method, SSHPacketProcessor can insert or remove layers, + * update checksums, or anonymize SSH-specific fields before final serialization. + */ + void header_preprocessing() override { + // Perform all transport-layer and lower-layer preprocessing + TransportPacket::header_preprocessing(); + + // TODO: Add SSH-specific preprocessing (e.g., port-based filtering, payload sanitization) + } +}; \ No newline at end of file diff --git a/heiFIP/layers/transport.cpp b/heiFIP/layers/transport.cpp new file mode 100644 index 0000000..debb87d --- /dev/null +++ b/heiFIP/layers/transport.cpp @@ -0,0 +1,293 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include // For SHA-256 hashing + +#include "header.cpp" +#include "ip.cpp" +#include "packet.cpp" + +/** + * @class TransportPacket + * @brief Extends IPPacket to handle TCP/UDP-specific hashing, optional payload stripping, + * and substitution of standard TCP/UDP layers with custom transport-layer classes. + * + * Responsibilities: + * - In the constructor, detect the presence of a TCP or UDP layer. + * - Compute a SHA-256 hash based on key transport-header fields (flags & options for TCP, + * or a simple identifier for UDP). + * - If TLS is present without a transport-layer handshake (e.g., no TCP/UDP) or if “Raw” + * payload exists without HTTP, strip downstream payload layers to sanitize the packet. + * - In header_preprocessing(), find and replace the original TCP/UDP layer with a custom + * transport-layer object (CustomTCPLayer or CustomUDPLayer), then recompute checksums. + * + * Inherits from IPPacket, so all IP- and Ethernet-level rewriting and hashing have already occurred. + */ +class TransportPacket : public IPPacket { +public: + /// Stores the SHA-256 hex string computed over transport-layer header fields + std::string hash; + + /** + * @brief Constructor: Compute transport-layer hash and optionally strip payload layers. + * + * @param rawPacketPointer unique_ptr to pcpp::RawPacket containing full packet bytes. + * @param addressMapping Initial mapping of original → replacement addresses (populated by IPPacket/EtherPacket). + * @param layerMap Map of protocol layers present (populated by FIPPacket base classes). + * + * Workflow: + * 1. Call IPPacket constructor, which: + * - Rewrites Ethernet MACs (via EtherPacket), + * - Detects layers and computes a packet-level SHA-256, + * - Rewrites IP addresses and computes an IP-header-specific hash, + * - Optionally strips IP payload if TLS/Raw conditions met. + * + * 2. If a TCP layer is present (layerMap["TCP"] is true): + * a. Retrieve the TcpLayer from `Packet`. + * b. Assemble a byte representing all TCP flags (SYN, ACK, FIN, RST, PSH, URG, ECE, CWR). + * c. Count the number of TCP options in the header. + * d. Build a string "flagsValue,optionsCount" and compute its SHA-256. + * e. Store the result in `hash`. + * f. If TLS exists without TCP/UDP, or Raw exists without HTTP, detach all downstream layers + * from the TCP layer and delete them, then recompute checksums (to sanitize the packet). + * + * 3. Else if a UDP layer is present (layerMap["UDP"] is true): + * a. Retrieve the UdpLayer. + * b. Build a simple hash input "UDP" and call md5Hash() (SHA-256) to compute a hash. + * c. If TLS exists without TCP/UDP, or Raw exists without HTTP, detach and delete all layers + * that follow the UDP layer, then recompute checksums. + */ + TransportPacket(std::unique_ptr rawPacketPointer, + std::unordered_map addressMapping = {}, + std::unordered_map layerMap = {}) + : IPPacket(std::move(rawPacketPointer), addressMapping, layerMap) + { + // If packet has a TCP layer, compute a hash of flags+options and possibly strip payload + if (layerMap["TCP"]) { + pcpp::TcpLayer* tcpLayer = Packet.getLayerOfType(); + if (tcpLayer != nullptr) { + // Extract TCP header fields + pcpp::tcphdr* tcpHeader = tcpLayer->getTcpHeader(); + // Pack all boolean flags into a single byte + uint8_t flags = 0; + if (tcpHeader->synFlag) flags |= 0x02; + if (tcpHeader->ackFlag) flags |= 0x10; + if (tcpHeader->finFlag) flags |= 0x01; + if (tcpHeader->rstFlag) flags |= 0x04; + if (tcpHeader->pshFlag) flags |= 0x08; + if (tcpHeader->urgFlag) flags |= 0x20; + if (tcpHeader->eceFlag) flags |= 0x40; + if (tcpHeader->cwrFlag) flags |= 0x80; + + // Count TCP options present in the header + int optionsCount = tcpLayer->getTcpOptionCount(); + + // Build hash input string: "," + std::ostringstream oss; + oss << static_cast(flags) << "," << optionsCount; + std::string hashInput = oss.str(); + + // Compute SHA-256 over hashInput + unsigned char digest[SHA256_DIGEST_LENGTH]; + SHA256(reinterpret_cast(hashInput.c_str()), + hashInput.length(), + digest); + + // Convert digest bytes to hex string and store in `hash` + std::ostringstream hashStream; + hashStream << std::hex << std::setw(2) << std::setfill('0'); + for (int i = 0; i < SHA256_DIGEST_LENGTH; ++i) { + hashStream << static_cast(digest[i]); + } + hash = hashStream.str(); + + // If TLS present without transport or Raw present without HTTP, strip payload + if (layerMap["TLS"] || (layerMap["Raw"] && !layerMap["HTTP"])) { + // Detach all layers following the TCP layer to remove payload + pcpp::Layer* next = tcpLayer->getNextLayer(); + while (next) { + pcpp::Layer* toRemove = next; + next = next->getNextLayer(); // Advance before deletion + Packet.detachLayer(toRemove); + delete toRemove; + } + // Recompute checksums/lengths for remaining headers + Packet.computeCalculateFields(); + } + } + } + // Else if packet has a UDP layer, compute a simple hash and possibly strip payload + else if (layerMap["UDP"]) { + pcpp::UdpLayer* udpLayer = Packet.getLayerOfType(); + if (udpLayer != nullptr) { + // Use the literal "UDP" as hash input + std::string layerName = "UDP"; + hash = md5Hash(layerName); + + // If TLS present without transport or Raw present without HTTP, strip payload + if (layerMap["TLS"] || (layerMap["Raw"] && !layerMap["HTTP"])) { + // Detach all layers following the UDP layer to remove payload + pcpp::Layer* next = udpLayer->getNextLayer(); + while (next) { + pcpp::Layer* toRemove = next; + next = next->getNextLayer(); + Packet.detachLayer(toRemove); + delete toRemove; + } + // Recompute checksums/lengths for remaining headers + Packet.computeCalculateFields(); + } + } + } + } + + /** + * @brief Replace the existing TCP/UDP layer with a Custom transport layer, then recompute checksums. + * + * Workflow for TCP: + * 1. If a TCP layer exists, retrieve it via Packet.getLayerOfType(). + * 2. Call header_preprocessing_tcp(oldTcp) to build a CustomTCPLayer from the old header fields. + * 3. Insert the CustomTCPLayer into the packet right after oldTcp’s previous layer. + * 4. Detach and delete the old TcpLayer. + * 5. Recompute length/checksum fields (Packet.computeCalculateFields()). + * + * Workflow for UDP: + * 1. If a UdpLayer exists, retrieve it via Packet.getLayerOfType(). + * 2. Call header_preprocessing_udp(oldUdp) to build a CustomUDPLayer. + * 3. Insert the CustomUDPLayer right after oldUdp’s previous layer. + * 4. Detach and delete the old UdpLayer. + * 5. Recompute length/checksum fields. + * + * Finally, call IPPacket::header_preprocessing() to allow IP-layer substitutions from the parent class. + */ + void header_preprocessing() override { + // Replace TCP layer if present + if (layer_map["TCP"]) { + pcpp::TcpLayer* oldTcp = Packet.getLayerOfType(); + if (!oldTcp) { + // No TCP layer found; skip + return; + } + + // Build a CustomTCPLayer from the old TCP header fields + std::unique_ptr customLayer = header_preprocessing_tcp(oldTcp); + + // Insert custom layer in place of old one + pcpp::Layer* prev = oldTcp->getPrevLayer(); + Packet.insertLayer(prev, customLayer.release()); + + // Detach and delete the original TCP layer + Packet.detachLayer(oldTcp); + delete oldTcp; + + // Recompute upper-layer lengths and checksums + Packet.computeCalculateFields(); + } + + // Replace UDP layer if present + if (layer_map["UDP"]) { + pcpp::UdpLayer* oldUdp = Packet.getLayerOfType(); + if (!oldUdp) { + // No UDP layer found; skip + return; + } + + // Build a CustomUDPLayer (no fields needed from original UDP header) + std::unique_ptr customLayer = header_preprocessing_udp(oldUdp); + + // Insert custom layer in place of old one + pcpp::Layer* prev = oldUdp->getPrevLayer(); + Packet.insertLayer(prev, customLayer.release()); + + // Detach and delete the original UDP layer + Packet.detachLayer(oldUdp); + delete oldUdp; + + // Recompute upper-layer lengths and checksums + Packet.computeCalculateFields(); + } + + // Delegate to IPPacket for any IPv4/IPv6 header substitutions + IPPacket::header_preprocessing(); + } + + /** + * @brief Build a CustomTCPLayer from an existing TcpLayer’s header fields. + * + * @param tcpLayer Pointer to the original pcpp::TcpLayer being replaced. + * @return std::unique_ptr Custom layer capturing TCP flags and options. + * + * Steps: + * 1. Read the tcphdr struct from tcpLayer to extract individual flags (FIN, SYN, RST, PSH, ACK, URG, ECE, CWR). + * 2. Pack these booleans into a single 8-bit flags value. + * 3. Determine the length of TCP options by subtracting the fixed header size from header_len. + * 4. Copy those options bytes into a std::vector. + * 5. Construct a CustomTCPLayer(flags, options) with those values. + * + * Why: + * - CustomTCPLayer may implement specialized behavior (e.g., anonymization, logging) based on flags/options. + */ + std::unique_ptr header_preprocessing_tcp(pcpp::TcpLayer* tcpLayer) { + pcpp::tcphdr* hdr = tcpLayer->getTcpHeader(); + // Pack flags into a single byte + uint16_t flags = 0; + if (hdr->finFlag) flags |= 0x01; + if (hdr->synFlag) flags |= 0x02; + if (hdr->rstFlag) flags |= 0x04; + if (hdr->pshFlag) flags |= 0x08; + if (hdr->ackFlag) flags |= 0x10; + if (hdr->urgFlag) flags |= 0x20; + if (hdr->eceFlag) flags |= 0x40; + if (hdr->cwrFlag) flags |= 0x80; + + // Calculate length of TCP options (header length minus fixed header size) + size_t optLen = tcpLayer->getHeaderLen() - sizeof(*hdr); + const uint8_t* optPtr = reinterpret_cast(hdr) + sizeof(*hdr); + std::vector options(optPtr, optPtr + optLen); + + return std::make_unique(flags, options); + } + + /** + * @brief Build a CustomUDPLayer for a given UdpLayer. + * + * @param udpLayer Pointer to the original pcpp::UdpLayer being replaced. + * @return std::unique_ptr Custom layer; no additional fields needed. + * + * Why: + * - CustomUDPLayer can encapsulate any UDP-specific processing in one place. Currently stateless. + */ + std::unique_ptr header_preprocessing_udp(pcpp::UdpLayer* /*udpLayer*/) { + return std::make_unique(); + } + +private: + /** + * @brief Compute a SHA-256 hash over a simple input string and return the hex string. + * + * @param input A small string (e.g., "UDP") to hash. + * @return std::string Lowercase hex string of SHA-256 digest. + * + * Why: + * - Provides a consistent digest even when transport-layer fields are minimal, + * enabling indexing or deduplication based on protocol type. + */ + std::string md5Hash(const std::string& input) { + unsigned char digest[SHA256_DIGEST_LENGTH]; + SHA256(reinterpret_cast(input.c_str()), input.length(), digest); + + std::ostringstream oss; + oss << std::hex << std::setw(2) << std::setfill('0'); + for (int i = 0; i < SHA256_DIGEST_LENGTH; ++i) { + oss << static_cast(digest[i]); + } + return oss.str(); + } +}; \ No newline at end of file diff --git a/heiFIP/main.cpp b/heiFIP/main.cpp new file mode 100644 index 0000000..32ec4d1 --- /dev/null +++ b/heiFIP/main.cpp @@ -0,0 +1,78 @@ +#include "runner.cpp" +#include +#include + +/// @brief Lists all `.pcap` file paths in a given directory. +/// @param dirString The path to the directory to scan. +/// @return A vector containing full paths to `.pcap` files in the directory. +std::vector listPcapFilePathsInDir(const std::string& dirString) { + std::vector pcapFilePaths; + std::filesystem::path dirPath{dirString}; + + // Return empty if the path does not exist or is not a directory + if (!std::filesystem::exists(dirPath) || !std::filesystem::is_directory(dirPath)) { + return pcapFilePaths; + } + + // Iterate through all files in the directory + for (auto const& entry : std::filesystem::directory_iterator(dirPath)) { + if (!entry.is_regular_file()) + continue; // Skip directories or special files + + std::filesystem::path filepath = entry.path(); + if (filepath.extension() == ".pcap") { + pcapFilePaths.push_back(filepath.string()); // Store full file path + } + } + + return pcapFilePaths; +} + +/// @brief Extracts the filename without extension from a full file path. +/// @param fullPath The complete file path. +/// @return The filename without its extension. +std::string filenameWithoutExtension(const std::string& fullPath) { + std::filesystem::path p{fullPath}; + return p.filename().stem().string(); +} + +/// @brief Entry point of the application. +/// Demonstrates loading `.pcap` files and generating images using a Runner object. +int main() { + // Paths to input `.pcap` directory and output image directory + std::string output_dir = "/Users/henrirebitzky/Documents/BachelorDerInformatikAnDerUniversitätHeidelberg/IFPGit/heiFIP/build/"; + std::string input_dir = "/Users/henrirebitzky/Documents/BachelorDerInformatikAnDerUniversitätHeidelberg/IFPGit/tests/pcaps/http"; + + // Retrieve all `.pcap` files from the input directory + std::vector files = listPcapFilePathsInDir(input_dir); + + Runner runner(4); // Runner with a thread pool of size 4 + + // Predefined argument sets for various image generation strategies + FlowImageArgs args{16, true, 0}; + FlowImageTiledFixedArgs args2{16, 0, 3}; + FlowImageTiledAutoArgs args3{16, 0, true}; + MarkovTransitionMatrixFlowArgs args4{3}; + MarkovTransitionMatrixPacketArgs args5{}; + PacketImageArgs args6{16, 0, true}; + + // Process each `.pcap` file and generate an image + for (const std::string& filepath : files) { + runner.create_image( + filenameWithoutExtension(filepath), // Image name based on filename + filepath, // Path to `.pcap` input file + output_dir, // Where to save output image + args3, // Argument set (select one appropriate for image type) + PacketProcessorType::HEADER, // Use HEADER for packet processing + ImageType::FlowImageTiledAuto, // Type of image to generate + 1, // Minimum image dimension + 2000, // Maximum image dimension + 1, // Minimum packets per flow + 2000, // Maximum packets per flow + false // Whether to remove duplicate packets + ); + } + + // return 0 indicates successful execution + return 0; +} \ No newline at end of file diff --git a/heiFIP/plugins/header.cpp b/heiFIP/plugins/header.cpp new file mode 100644 index 0000000..afde2e6 --- /dev/null +++ b/heiFIP/plugins/header.cpp @@ -0,0 +1,1208 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * ============================================================================= + * Custom IP Header Structures and Layers + * ============================================================================= + */ + +#pragma pack(push,1) +/** + * @struct CustomIPHeader + * @brief Defines a minimal 8-byte “IP-like” header layout for internal use. + * + * Fields (1 byte each): + * - version_and_flags: High 4 bits = version, low 4 bits = flags. + * - tos : Type of Service. + * - ttl : Time to Live. + * - proto : Upper-layer protocol (e.g., TCP=6, UDP=17). + * - reserved[4] : Reserved for alignment/possible future use. + */ +struct CustomIPHeader { + uint8_t version_and_flags; // high 4 bits: version, low 4 bits: flags + uint8_t tos; // Type of Service + uint8_t ttl; // Time to Live + uint8_t proto; // Protocol number + uint8_t reserved[4]; // Reserved/padding +}; +#pragma pack(pop) + +#pragma pack(push, 1) +/** + * @struct custom_ip_header + * @brief 4-byte minimal IPv4-like header for the CustomIPLayer. + * + * Fields: + * - versionAndFlags: 1 byte, high nibble = IP version, low nibble = custom flags. + * - typeOfService : 1 byte, TOS field. + * - timeToLive : 1 byte, TTL value. + * - protocol : 1 byte, encodes the upper-layer protocol. + */ +struct custom_ip_header { + uint8_t versionAndFlags; // 4 bits version, 4 bits flags + uint8_t typeOfService; // TOS + uint8_t timeToLive; // TTL + uint8_t protocol; // Protocol number +}; +#pragma pack(pop) + +/** + * @class CustomIPLayer + * @brief Implements a minimal custom IPv4-like header as a pcpp::Layer. + * + * Responsibilities: + * - Allocate and store a 4-byte custom_ip_header in m_Data. + * - Expose getters/setters for version, flags, TOS, TTL, and protocol. + * - Provide PcapPlusPlus overrides so it can be inserted into a pcpp::Packet. + */ +class CustomIPLayer : public pcpp::Layer { +public: + /** + * @brief Constructor: build a new custom IPv4 header from individual fields. + * + * @param version 4-bit IP version (e.g., 4 for IPv4). + * @param flags 4-bit custom flags. + * @param tos 8-bit Type of Service. + * @param ttl 8-bit Time to Live. + * @param proto 8-bit upper-layer protocol number. + * + * Workflow: + * 1. Allocate m_Data of size sizeof(custom_ip_header). + * 2. Set m_Protocol = UnknownProtocol (no standard PcapPlusPlus enum). + * 3. Fill the custom_ip_header fields in network (native) byte order: + * - versionAndFlags = (version << 4) | (flags & 0x0F). + * - typeOfService = tos. + * - timeToLive = ttl. + * - protocol = proto. + * 4. Call computeCalculateFields() to recalc any checksums if needed (no-op here). + */ + CustomIPLayer(uint8_t version, + uint8_t flags, + uint8_t tos, + uint8_t ttl, + uint8_t proto) + { + // Allocate header buffer + m_DataLen = sizeof(custom_ip_header); + m_Data = new uint8_t[m_DataLen]; + m_Protocol = pcpp::UnknownProtocol; + + // Cast m_Data to our struct and populate fields + auto* hdr = reinterpret_cast(m_Data); + hdr->versionAndFlags = static_cast((version << 4) | (flags & 0x0F)); + hdr->typeOfService = tos; + hdr->timeToLive = ttl; + hdr->protocol = proto; + + computeCalculateFields(); + } + + /** + * @brief Copy constructor: duplicate another CustomIPLayer, copying m_Data. + */ + CustomIPLayer(const CustomIPLayer& other) + : Layer(other) + { + m_DataLen = other.m_DataLen; + m_Data = new uint8_t[m_DataLen]; + memcpy(m_Data, other.m_Data, m_DataLen); + } + + virtual ~CustomIPLayer() { + // Base Layer destructor will free m_Data if it is owned + } + + /** + * @brief Return the size of our header (4 bytes). + */ + virtual size_t getHeaderLen() const override { + return sizeof(custom_ip_header); + } + + /** + * @brief Recompute any dynamic fields. No dynamic fields here, so no action. + * + * Called by PcapPlusPlus whenever the packet is re-serialized or length fields need recalculation. + */ + virtual void computeCalculateFields() override { + // No additional fields to recalculate (version/flags are static once set). + } + + /** + * @brief Called by PcapPlusPlus when parsing subsequent layers. + * We do not parse any next layer from our custom header, so do nothing. + */ + void parseNextLayer() override {} + + /** + * @brief Indicate that this layer sits at the Network layer in the OSI stack. + */ + pcpp::OsiModelLayer getOsiModelLayer() const override { + return pcpp::OsiModelNetworkLayer; + } + + /** + * @brief Return a human-readable layer name for debugging. + */ + virtual std::string toString() const override { + return "CustomIPLayer"; + } + + // ------------------------------------------------------------------------ + // Field Accessors + // ------------------------------------------------------------------------ + + /** + * @brief Extract the high-nibble (version) from versionAndFlags. + */ + uint8_t getVersion() const { + auto* hdr = reinterpret_cast(m_Data); + return hdr->versionAndFlags >> 4; + } + + /** + * @brief Extract the low-nibble (flags) from versionAndFlags. + */ + uint8_t getFlags() const { + auto* hdr = reinterpret_cast(m_Data); + return hdr->versionAndFlags & 0x0F; + } + + /** + * @brief Return the Type of Service field. + */ + uint8_t getTos() const { + auto* hdr = reinterpret_cast(m_Data); + return hdr->typeOfService; + } + + /** + * @brief Return the Time To Live field. + */ + uint8_t getTtl() const { + auto* hdr = reinterpret_cast(m_Data); + return hdr->timeToLive; + } + + /** + * @brief Return the Protocol (upper-layer) field. + */ + uint8_t getProtocol() const { + auto* hdr = reinterpret_cast(m_Data); + return hdr->protocol; + } + + // ------------------------------------------------------------------------ + // Field Setters (optional: if you want to change header after creation) + // ------------------------------------------------------------------------ + + /** + * @brief Update the version (high 4 bits) in versionAndFlags. + */ + void setVersion(uint8_t v) { + auto* hdr = reinterpret_cast(m_Data); + hdr->versionAndFlags = static_cast((v << 4) | (hdr->versionAndFlags & 0x0F)); + } + + /** + * @brief Update the flags (low 4 bits) in versionAndFlags. + */ + void setFlags(uint8_t f) { + auto* hdr = reinterpret_cast(m_Data); + hdr->versionAndFlags = static_cast((hdr->versionAndFlags & 0xF0) | (f & 0x0F)); + } + + // Similar setters can be added for TOS, TTL, and protocol if desired +}; + + +#pragma pack(push, 1) +/** + * @struct custom_ipv6_header + * @brief 4-byte minimal IPv6-like header layout for CustomIPv6Layer. + * + * Fields: + * - version : 8-bit version (high nibble valid, low nibble zero). + * - trafficClass : 8-bit Traffic Class. + * - nextHeader : 8-bit next header field (upper-layer protocol). + * - hopLimit : 8-bit Hop Limit. + */ +struct custom_ipv6_header { + uint8_t version; ///< IPv6 version (e.g., 6) in high nibble + uint8_t trafficClass; ///< Traffic Class field + uint8_t nextHeader; ///< Next Layer protocol (e.g., TCP=6, UDP=17) + uint8_t hopLimit; ///< Hop Limit field +}; +#pragma pack(pop) + +/** + * @class CustomIPv6Layer + * @brief Implements a minimal custom IPv6-like header as a pcpp::Layer. + * + * Responsibilities: + * - Allocate and store a 4-byte custom_ipv6_header in m_Data. + * - Expose getters/setters for version, trafficClass, nextHeader, and hopLimit. + * - Provide necessary PcapPlusPlus overrides for header length, parsing, and serialization. + */ +class CustomIPv6Layer : public pcpp::Layer { +public: + /** + * @brief Constructor: build a new custom IPv6 header from individual fields. + * + * @param version 8-bit version (e.g., 6 for IPv6; lower nibble unused). + * @param tc 8-bit Traffic Class. + * @param nh 8-bit Next Header (upper-layer protocol). + * @param hlim 8-bit Hop Limit. + * + * Workflow: + * 1. Allocate m_Data of size sizeof(custom_ipv6_header). + * 2. Set m_Protocol = UnknownProtocol (no standard PcapPlusPlus enum). + * 3. Fill the custom_ipv6_header fields directly: + * - version = version. + * - trafficClass = tc. + * - nextHeader = nh. + * - hopLimit = hlim. + * 4. Call computeCalculateFields() (no dynamic fields here). + */ + CustomIPv6Layer(uint8_t version, + uint8_t tc, + uint8_t nh, + uint8_t hlim) + { + m_DataLen = sizeof(custom_ipv6_header); + m_Data = new uint8_t[m_DataLen]; + m_Protocol = pcpp::UnknownProtocol; + + auto* hdr = reinterpret_cast(m_Data); + hdr->version = version; + hdr->trafficClass = tc; + hdr->nextHeader = nh; + hdr->hopLimit = hlim; + + computeCalculateFields(); + } + + /** + * @brief Copy constructor: duplicate another CustomIPv6Layer, copying m_Data. + */ + CustomIPv6Layer(const CustomIPv6Layer& other) + : Layer(other) + { + m_DataLen = other.m_DataLen; + m_Data = new uint8_t[m_DataLen]; + memcpy(m_Data, other.m_Data, m_DataLen); + } + + virtual ~CustomIPv6Layer() = default; + + /** + * @brief Return the size of our header (4 bytes). + */ + virtual size_t getHeaderLen() const override { + return sizeof(custom_ipv6_header); + } + + /** + * @brief Recompute dynamic fields. No dynamic fields here, so no action. + */ + virtual void computeCalculateFields() override { + // Nothing to recalc for our simple header + } + + /** + * @brief Called by PcapPlusPlus when parsing subsequent layers. We stop here. + */ + void parseNextLayer() override {} + + /** + * @brief Return a human-readable layer name. + */ + virtual std::string toString() const override { + return "CustomIPv6Layer"; + } + + /** + * @brief Indicate this is a Network layer (IPv6). + */ + pcpp::OsiModelLayer getOsiModelLayer() const override { + return pcpp::OsiModelNetworkLayer; + } + + // ------------------------------------------------------------------------ + // Field Accessors + // ------------------------------------------------------------------------ + + uint8_t getVersion() const { + auto* hdr = reinterpret_cast(m_Data); + return hdr->version; + } + + uint8_t getTrafficClass() const { + auto* hdr = reinterpret_cast(m_Data); + return hdr->trafficClass; + } + + uint8_t getNextHeader() const { + auto* hdr = reinterpret_cast(m_Data); + return hdr->nextHeader; + } + + uint8_t getHopLimit() const { + auto* hdr = reinterpret_cast(m_Data); + return hdr->hopLimit; + } + + // ------------------------------------------------------------------------ + // Field Setters + // ------------------------------------------------------------------------ + + void setVersion(uint8_t v) { + reinterpret_cast(m_Data)->version = v; + } + + void setTrafficClass(uint8_t tc) { + reinterpret_cast(m_Data)->trafficClass = tc; + } + + void setNextHeader(uint8_t nh) { + reinterpret_cast(m_Data)->nextHeader = nh; + } + + void setHopLimit(uint8_t hlim) { + reinterpret_cast(m_Data)->hopLimit = hlim; + } +}; + + +/* + * ============================================================================= + * Custom TCP Layer + * ============================================================================= + */ + +#pragma pack(push,1) +/** + * @struct CustomTCPHeader + * @brief Minimal representation of a TCP header storing only flags in network byte order. + * + * Fields: + * - flags: 16-bit TCP flags, already converted to network byte order by the constructor. + * - Raw TCP options (variable length) follow this 2-byte field in the payload. + */ +struct CustomTCPHeader { + uint16_t flags; // TCP flags (in network byte order) + // options follow immediately, but not part of this struct layout +}; +#pragma pack(pop) + +/** + * @class CustomTCPLayer + * @brief Implements a custom TCP header that contains only flags and raw options, as a pcpp::Layer. + * + * Responsibilities: + * - Allocate a buffer containing a 2-byte flags field (network byte order) followed by raw options. + * - Provide parseNextLayer() to create a PayloadLayer for any bytes beyond the header. + * - Expose getCustomHeader() to access the CustomTCPHeader in the buffer. + * - Override required Layer methods (getHeaderLen(), computeCalculateFields(), toString(), getOsiModelLayer()). + */ +class CustomTCPLayer : public pcpp::Layer { +public: + /** + * @brief Constructor: build a custom TCP header with flags and options. + * + * @param flags 16-bit TCP flags in host byte order (will be converted to network order). + * @param options Vector of raw TCP options bytes (variable length). + * + * Workflow: + * 1. Convert flags to network byte order (htons). + * 2. Resize internal `_buffer` to hold 2 bytes (flags) + options.size(). + * 3. Copy `flagsNet` into `_buffer[0..1]`, then append `options`. + * 4. Set m_Data = pointer to `_buffer` data, and m_DataLen = `_buffer.size()`. + * 5. Call computeCalculateFields() (no dynamic fields here). + */ + CustomTCPLayer(uint16_t flags, const std::vector& options) + : Layer(nullptr, 0, nullptr, 0) + { + // Convert flags to network byte order + uint16_t flagsNet = htons(flags); + + // Build buffer: flags(2 bytes) + options + _buffer.resize(sizeof(CustomTCPHeader)); + memcpy(_buffer.data(), &flagsNet, sizeof(flagsNet)); + _buffer.insert(_buffer.end(), options.begin(), options.end()); + + m_Data = _buffer.data(); + m_DataLen = _buffer.size(); + computeCalculateFields(); + } + + /** + * @brief Access the custom TCP header (first 2 bytes) from the buffer. + */ + CustomTCPHeader* getCustomHeader() const { + return reinterpret_cast(const_cast(m_Data)); + } + + /** + * @brief parseNextLayer: any bytes beyond the header become a PayloadLayer. + */ + void parseNextLayer() override { + size_t headerLen = getHeaderLen(); + if (m_DataLen > headerLen) { + // The remaining bytes form a payload + m_NextLayer = new pcpp::PayloadLayer( + m_Data + headerLen, + m_DataLen - headerLen, + this, + m_Packet + ); + } + } + + /** + * @brief Recompute dynamic fields. No dynamic fields, so no action. + */ + void computeCalculateFields() override { + // No checksums or length fields to recalc here + } + + /** + * @brief Return a human-readable string for this layer. + */ + std::string toString() const override { + return "CustomTCPLayer(len=" + std::to_string(getHeaderLen()) + ")"; + } + + /** + * @brief Indicate this layer is at the Transport layer in OSI model. + */ + pcpp::OsiModelLayer getOsiModelLayer() const override { + return pcpp::OsiModelTransportLayer; + } + + /** + * @brief Return the header length (flags + options). + */ + size_t getHeaderLen() const override { + return _buffer.size(); + } + + /** + * @brief Return a short name for the layer (TCP). + */ + std::string getName() const { + return "TCP"; + } + +private: + std::vector _buffer; ///< Internal buffer storing flags + options +}; + +/* + * ============================================================================= + * Custom UDP Layer + * ============================================================================= + */ + +/** + * @class CustomUDPLayer + * @brief Implements a “no-op” UDP layer (zero-length), primarily for consistency. + * + * Responsibilities: + * - Provide getHeaderLen() = 0 (no UDP header). + * - parseNextLayer() will consume any remaining bytes as PayloadLayer. + * - Override computeCalculateFields(), toString(), getOsiModelLayer(), getName(). + */ +class CustomUDPLayer : public pcpp::Layer { +public: + /** + * @brief Constructor: no header data is allocated (m_Data = nullptr, m_DataLen = 0). + */ + CustomUDPLayer() + : Layer(nullptr, 0, nullptr, 0) + { + m_Data = nullptr; + m_DataLen = 0; + computeCalculateFields(); + } + + /** + * @brief parseNextLayer: if there is any data beyond “header” (none), treat as payload. + */ + void parseNextLayer() override { + if (m_DataLen > getHeaderLen()) { + m_NextLayer = new pcpp::PayloadLayer( + m_Data + getHeaderLen(), + m_DataLen - getHeaderLen(), + this, + m_Packet + ); + } + } + + /** + * @brief No dynamic fields, so no action. + */ + void computeCalculateFields() override {} + + /** + * @brief Return human-readable name for debugging. + */ + std::string toString() const override { + return "CustomUDPLayer(len=0)"; + } + + /** + * @brief Indicate this layer is at the Transport layer (UDP). + */ + pcpp::OsiModelLayer getOsiModelLayer() const override { + return pcpp::OsiModelTransportLayer; + } + + /** + * @brief Return header length (zero bytes). + */ + size_t getHeaderLen() const override { + return 0; + } + + /** + * @brief Return short layer name. + */ + std::string getName() const { + return "UDP"; + } +}; + +/* + * ============================================================================= + * Custom HTTP Layer + * ============================================================================= + */ + +/** + * @class CustomHTTP + * @brief Base class for custom HTTP layers (requests and responses). + * + * Responsibilities: + * - Maintain a vector of HTTP header field name/value pairs. + * - On computeCalculateFields(), serialize those fields into m_Data as ASCII text: + * • For “Method” or “Path” or “Status_Code”, output “value ”. + * • For other fields, output “Name: Value\r\n”. + * - Set m_Protocol = UnknownProtocol, since this is an application-layer payload. + * - Provide getHeaderLen(), parseNextLayer() (no next layer), toString(), getOsiModelLayer(). + */ +class CustomHTTP : public pcpp::Layer { +public: + /** + * @struct Field + * @brief Represents one HTTP header field with name and value. + */ + struct Field { + std::string name; ///< Field name (e.g., "User-Agent") + std::string value; ///< Field value (e.g., "Mozilla/5.0") + }; + + std::vector fields; ///< List of all fields in this HTTP layer + + /** + * @brief Constructor: initialize with no fields, m_DataLen = 0. + */ + CustomHTTP() + { + m_Protocol = pcpp::UnknownProtocol; + m_DataLen = 0; + m_Data = nullptr; + computeCalculateFields(); + } + + /** + * @brief Add an HTTP header field (name:value) to this layer. + * + * @param fieldName Name of the header (e.g., "Content-Type"). + * @param fieldValue Value of the header (e.g., "text/html"). + */ + void addField(const std::string& fieldName, const std::string& fieldValue) + { + fields.push_back({fieldName, fieldValue}); + } + + /** + * @brief Serialize all fields into m_Data buffer as ASCII text. + * + * Workflow: + * 1. Create a std::ostringstream. + * 2. For each field in `fields`: + * - If field.value is empty, skip it. + * - If field.name is "Method", "Path", or "Status_Code", append "value " (space, not CRLF). + * - Otherwise, append "Name: Value\r\n". + * 3. Convert the stream to a string `serialized`. + * 4. Allocate m_Data of length serialized.size() and copy serialized.data() to m_Data. + */ + void computeCalculateFields() override + { + std::ostringstream stream; + for (const auto& field : fields) { + if (field.value.empty()) + continue; + + if (field.name == "Method" || field.name == "Path" || field.name == "Status_Code") { + stream << field.value << ' '; + } else { + stream << field.name << ": " << field.value << "\r\n"; + } + } + std::string serialized = stream.str(); + + m_DataLen = serialized.size(); + m_Data = new uint8_t[m_DataLen]; + std::memcpy(m_Data, serialized.data(), m_DataLen); + } + + /** + * @brief Return the header length (size of m_Data). + */ + size_t getHeaderLen() const override { + return m_DataLen; + } + + /** + * @brief No subsequent layer is parsed (application layer terminates). + */ + void parseNextLayer() override { + m_NextLayer = nullptr; + } + + /** + * @brief Human-readable name for this layer. + */ + std::string toString() const override { + return "CustomHTTP Layer"; + } + + /** + * @brief Indicate that this layer is at the Application layer (HTTP). + */ + pcpp::OsiModelLayer getOsiModelLayer() const override { + return pcpp::OsiModelApplicationLayer; + } +}; + +/** + * @class CustomHTTPRequest + * @brief Specialization of CustomHTTP for HTTP request messages. + * + * Responsibilities: + * - On construction, pre-populate common HTTP request fields with defaults: + * • Method = "GET" + * • Path = "/" + * • User_Agent, Content_Type, Connection, Accept, Accept_Charset, Accept_Encoding, Cookie, TE + * all begin as empty. + * - After construction, call computeCalculateFields() to build m_Data from defaults. + */ +class CustomHTTPRequest : public CustomHTTP { +public: + /** + * @brief Constructor: initialize standard request fields with default values. + */ + CustomHTTPRequest() + { + // Set default request-line fields + addField("Method", "GET"); + addField("Path", "/"); + // Add common header fields (initially empty) + addField("User_Agent", ""); + addField("Content_Type", ""); + addField("Connection", ""); + addField("Accept", ""); + addField("Accept_Charset", ""); + addField("Accept_Encoding", ""); + addField("Cookie", ""); + addField("TE", ""); + computeCalculateFields(); + } + + /** + * @brief Human-readable name override. + */ + virtual std::string toString() const override { + return "HTTP Request Layer"; + } + + /** + * @brief Return the layer name for printing or debugging. + */ + std::string getName() const { + return "HTTP Request"; + } +}; + +/** + * @class CustomHTTPResponse + * @brief Specialization of CustomHTTP for HTTP response messages. + * + * Responsibilities: + * - On construction, pre-populate common HTTP response fields with defaults: + * • Status_Code = "200" + * • Connection, Content_Encoding, Content_Type, Server, Set_Cookie, Transfer_Encoding + * all begin as empty. + * - After construction, call computeCalculateFields() to build m_Data from defaults. + */ +class CustomHTTPResponse : public CustomHTTP { +public: + /** + * @brief Constructor: initialize standard response fields with default values. + */ + CustomHTTPResponse() + { + // Set default response-line field + addField("Status_Code", "200"); + // Add common response headers (initially empty) + addField("Connection", ""); + addField("Content_Encoding", ""); + addField("Content_Type", ""); + addField("Server", ""); + addField("Set_Cookie", ""); + addField("Transfer_Encoding", ""); + computeCalculateFields(); + } + + /** + * @brief Human-readable name override. + */ + virtual std::string toString() const override { + return "HTTP Response Layer"; + } + + /** + * @brief Return the layer name for printing or debugging. + */ + std::string getName() const { + return "HTTP Response"; + } +}; + +/* + * ============================================================================= + * Custom DNS Layers (Question Record, Resource Record, Header) + * ============================================================================= + */ + +/** + * @class CustomDNSQR + * @brief Represents a custom DNS Question Record (QNAME, QTYPE, QCLASS). + * + * Responsibilities: + * - Store qname (domain name string) and qtype (DNS query type, e.g., A=1). + * - On computeCalculateFields(), encode qname in DNS wire format: + * • Split qname by '.', write each label as length-prefixed. + * • Terminate with a null byte. + * • Append QTYPE (2 bytes, network byte order) and QCLASS=1 (IN, 2 bytes). + * - Provide getHeaderLen(), parseNextLayer() (no next), toString(), getOsiModelLayer(). + */ +class CustomDNSQR : public pcpp::Layer { +public: + std::string qname; ///< Domain name (e.g., "example.com") + uint16_t qtype; ///< Query type (e.g., 1 for A) + + /** + * @brief Constructor: store qname and qtype (default “none”, type=1). + */ + CustomDNSQR(const std::string& name = "none", uint16_t type = 1) + : Layer(), qname(name), qtype(type) + { + m_Protocol = pcpp::UnknownProtocol; + m_DataLen = 0; + m_Data = nullptr; + } + + /** + * @brief Change the stored qname. + */ + void setQName(const std::string& name) { qname = name; } + + /** + * @brief Change the stored qtype. + */ + void setQType(uint16_t type) { qtype = type; } + + /** + * @brief Serialize the DNS question into m_Data in standard DNS “QNAME” + “QTYPE QCLASS” format. + * + * Workflow: + * 1. Split `qname` on '.' into labels vector. + * 2. Compute `nameLen` = sum(labelLengths + 1) + 1 for final null byte. + * 3. m_DataLen = nameLen + 4 (2 bytes QTYPE + 2 bytes QCLASS). + * 4. Allocate m_Data = new uint8_t[m_DataLen]. + * 5. Write each label with length prefix, then a final 0 byte. + * 6. Write QTYPE in network byte order, then QCLASS=1 (IN) in network order. + */ + void computeCalculateFields() override { + // 1) Split qname into labels by '.' + std::vector labels; + std::istringstream iss(qname); + std::string label; + while (std::getline(iss, label, '.')) { + labels.push_back(label); + } + + // 2) Compute length of name portion + size_t nameLen = 1; // final null + for (auto& lbl : labels) { + nameLen += lbl.size() + 1; // length byte + label + } + + // 3) Total length = nameLen + 2 bytes QTYPE + 2 bytes QCLASS + m_DataLen = nameLen + 4; + m_Data = new uint8_t[m_DataLen]; + + // 4) Fill QNAME + size_t offset = 0; + for (auto& lbl : labels) { + m_Data[offset++] = static_cast(lbl.size()); // label length + memcpy(m_Data + offset, lbl.data(), lbl.size()); + offset += lbl.size(); + } + m_Data[offset++] = 0; // end of QNAME + + // 5) Write QTYPE in network order + uint16_t netType = htons(qtype); + memcpy(m_Data + offset, &netType, sizeof(netType)); + offset += sizeof(netType); + + // 6) Write QCLASS = 1 (IN) in network order + uint16_t qclass = htons(1); + memcpy(m_Data + offset, &qclass, sizeof(qclass)); + } + + /** + * @brief Return the question-record length. + */ + size_t getHeaderLen() const override { + return m_DataLen; + } + + /** + * @brief No next layer to parse (application layer terminates). + */ + void parseNextLayer() override { + m_NextLayer = nullptr; + } + + /** + * @brief Human-readable layer name. + */ + std::string toString() const override { + return "Custom DNS Question Record"; + } + + /** + * @brief Indicate this is an application-layer object (DNS). + */ + pcpp::OsiModelLayer getOsiModelLayer() const override { + return pcpp::OsiModelApplicationLayer; + } + + /** + * @brief Return a short name for this custom layer. + */ + std::string getName() const { + return "DNS Question Record"; + } +}; + +/** + * @class CustomDNSRR + * @brief Represents a custom DNS Resource Record (RR) without RDATA. + * + * Responsibilities: + * - Store rrname (domain name), type (e.g., A=1), and TTL (time to live). + * - On computeCalculateFields(), serialize the RR in DNS wire format: + * • NAME (labels + null terminator). + * • TYPE (2 bytes), CLASS=1 (2 bytes), TTL (4 bytes), RDLENGTH=0 (2 bytes). + * - Provide getHeaderLen(), parseNextLayer(), toString(), getOsiModelLayer(). + */ +class CustomDNSRR : public pcpp::Layer { +public: + std::string rrname; ///< Domain name for the RR (e.g., "example.com") + uint16_t type; ///< RR type (e.g., 1 for A record) + uint32_t ttl; ///< Time to Live for this record + + /** + * @brief Constructor: store rrname, type, and TTL. No m_Data allocated yet. + */ + CustomDNSRR(const std::string& name = "", + uint16_t t = 1, + uint32_t timeToLive = 0) + : Layer(), rrname(name), type(t), ttl(timeToLive) + { + m_Protocol = pcpp::UnknownProtocol; + m_DataLen = 0; + m_Data = nullptr; + } + + /** + * @brief Update the stored RR name. + */ + void setRRName(const std::string& name) { rrname = name; } + + /** + * @brief Update the stored type. + */ + void setType(uint16_t t) { type = t; } + + /** + * @brief Update the stored TTL. + */ + void setTTL(uint32_t timeToLive) { ttl = timeToLive; } + + /** + * @brief Serialize the RR into m_Data in DNS wire format (excluding RDATA). + * + * Workflow: + * 1. Split rrname into labels by '.'. + * 2. Compute nameLen = sum(label lengths + 1) + 1 for terminating null. + * 3. Total length = nameLen + 2(TYPE) + 2(CLASS) + 4(TTL) + 2(RDLENGTH=0). + * 4. Allocate m_Data of size m_DataLen. + * 5. Write NAME (labels + null). + * 6. Write TYPE in network order. + * 7. Write CLASS=1 (IN) in network order. + * 8. Write TTL in network order. + * 9. Write RDLENGTH=0 in network order. + */ + void computeCalculateFields() override { + // 1) Split rrname into labels + std::vector labels; + std::istringstream iss(rrname); + std::string label; + while (std::getline(iss, label, '.')) { + labels.push_back(label); + } + + // 2) Compute length of NAME portion + size_t nameLen = 1; // final null + for (auto& lbl : labels) { + nameLen += lbl.size() + 1; + } + + // 3) Total length = nameLen + 2 bytes TYPE + 2 bytes CLASS + 4 bytes TTL + 2 bytes RDLENGTH + m_DataLen = nameLen + 10; + m_Data = new uint8_t[m_DataLen]; + + // 4) Fill NAME + size_t offset = 0; + for (auto& lbl : labels) { + m_Data[offset++] = static_cast(lbl.size()); + memcpy(m_Data + offset, lbl.data(), lbl.size()); + offset += lbl.size(); + } + m_Data[offset++] = 0; // end of NAME + + // 5) Write TYPE + uint16_t netType = htons(type); + memcpy(m_Data + offset, &netType, sizeof(netType)); + offset += sizeof(netType); + + // 6) Write CLASS = 1 (IN) + uint16_t qclass = htons(1); + memcpy(m_Data + offset, &qclass, sizeof(qclass)); + offset += sizeof(qclass); + + // 7) Write TTL + uint32_t netTTL = htonl(ttl); + memcpy(m_Data + offset, &netTTL, sizeof(netTTL)); + offset += sizeof(netTTL); + + // 8) Write RDLENGTH = 0 (no RDATA) + uint16_t rdlen = htons(0); + memcpy(m_Data + offset, &rdlen, sizeof(rdlen)); + } + + /** + * @brief Return the length of the RR (header only, no RDATA). + */ + size_t getHeaderLen() const override { + return m_DataLen; + } + + /** + * @brief No next layer is parsed (application layer). + */ + void parseNextLayer() override { + m_NextLayer = nullptr; + } + + /** + * @brief Human-readable layer name. + */ + std::string toString() const override { + return "Custom DNS Resource Record"; + } + + /** + * @brief Indicate this is an application-layer DNS RR. + */ + pcpp::OsiModelLayer getOsiModelLayer() const override { + return pcpp::OsiModelApplicationLayer; + } + + /** + * @brief Return short layer name. + */ + std::string getName() const { + return "DNS Resource Record"; + } +}; + +/** + * @class CustomDNS + * @brief Represents a custom DNS header (12 bytes) without any question or record bodies. + * + * Responsibilities: + * - Store DNS header fields (QR, opcode, AA, TC, RD, RA, Z, AD, CD, rcode). + * - Store section counts: qdCount (questions), anCount (answers), + * nsCount (authority), arCount (additional). + * - On computeCalculateFields(), build a 12-byte DNS header in wire format: + * • ID (2 bytes) = 0. + * • Flags (2 bytes) assembled from individual bits. + * • QDCOUNT, ANCOUNT, NSCOUNT, ARCOUNT (each 2 bytes, network order). + * - Provide getHeaderLen(), parseNextLayer(), toString(), getOsiModelLayer(). + */ +class CustomDNS : public pcpp::Layer { +public: + // DNS FLAGS (1 bit each except opcode (4 bits) and rcode (4 bits)) + bool qr = false; ///< Query (0) or Response (1) + uint8_t opcode = 0; ///< 4-bit operation code + bool aa = false; ///< Authoritative Answer + bool tc = false; ///< Truncation + bool rd = true; ///< Recursion Desired (default true) + bool ra = false; ///< Recursion Available + bool z = false; ///< Reserved (zero) + bool ad = false; ///< Authenticated Data + bool cd = false; ///< Checking Disabled + uint8_t rcode = 0; ///< 4-bit response code + + // SECTION COUNTS + uint16_t qdCount = 0; ///< Number of question records + uint16_t anCount = 0; ///< Number of answer records + uint16_t nsCount = 0; ///< Number of authority records + uint16_t arCount = 0; ///< Number of additional records + + /** + * @brief Constructor: initialize all DNS header fields to defaults (zero except RD). + */ + CustomDNS() { + m_Protocol = pcpp::UnknownProtocol; + m_DataLen = 0; + m_Data = nullptr; + } + + /** + * @brief Serialize the 12-byte DNS header into m_Data in wire format. + * + * Workflow: + * 1. m_DataLen = 12. + * 2. Allocate m_Data = new uint8_t[12]. + * 3. Write ID = 0 (2 bytes, network order). + * 4. Build flags field (16 bits) by shifting individual bits into correct positions: + * - bit15: QR + * - bits14-11: opcode + * - bit10: AA + * - bit9: TC + * - bit8: RD + * - bit7: RA + * - bit6: Z + * - bit5: AD + * - bit4: CD + * - bits3-0: rcode + * Write flags as 2 bytes in network order. + * 5. Write qdCount, anCount, nsCount, arCount each as 2 bytes in network order. + */ + void computeCalculateFields() override { + m_DataLen = 12; + m_Data = new uint8_t[m_DataLen]; + size_t offset = 0; + + // 1) ID = 0 + uint16_t id = 0; + uint16_t netId = htons(id); + memcpy(m_Data + offset, &netId, sizeof(netId)); + offset += sizeof(netId); + + // 2) Build flags in a 16-bit field + uint16_t flags = 0; + flags |= (qr ? 1u << 15 : 0); + flags |= (static_cast(opcode & 0xF) << 11); + flags |= (aa ? 1u << 10 : 0); + flags |= (tc ? 1u << 9 : 0); + flags |= (rd ? 1u << 8 : 0); + flags |= (ra ? 1u << 7 : 0); + flags |= (z ? 1u << 6 : 0); + flags |= (ad ? 1u << 5 : 0); + flags |= (cd ? 1u << 4 : 0); + flags |= (static_cast(rcode & 0xF)); + uint16_t netFlags = htons(flags); + memcpy(m_Data + offset, &netFlags, sizeof(netFlags)); + offset += sizeof(netFlags); + + // 3) Write section counts (network order) + auto writeCount = [&](uint16_t val) { + uint16_t netVal = htons(val); + memcpy(m_Data + offset, &netVal, sizeof(netVal)); + offset += sizeof(netVal); + }; + writeCount(qdCount); + writeCount(anCount); + writeCount(nsCount); + writeCount(arCount); + } + + /** + * @brief Return the fixed header length (12 bytes). + */ + size_t getHeaderLen() const override { + return m_DataLen; + } + + /** + * @brief No next layer is parsed (DNS header stands alone; questions/records inserted separately). + */ + void parseNextLayer() override { + m_NextLayer = nullptr; + } + + /** + * @brief Human-readable layer name for debugging. + */ + std::string toString() const override { + return "Custom DNS Header"; + } + + /** + * @brief Indicate application-layer (DNS). + */ + pcpp::OsiModelLayer getOsiModelLayer() const override { + return pcpp::OsiModelApplicationLayer; + } + + /** + * @brief Return short layer name. + */ + std::string getName() const { + return "DNS"; + } +}; \ No newline at end of file diff --git a/heiFIP/runner.cpp b/heiFIP/runner.cpp new file mode 100644 index 0000000..f2af68a --- /dev/null +++ b/heiFIP/runner.cpp @@ -0,0 +1,66 @@ +#pragma once + +#include +#include + +#include "extractor.cpp" + +// Runner class orchestrates multithreaded image generation using FIPExtractor +class Runner { +private: + int thread_number; // Number of threads available for processing + FIPExtractor extractor; // Core packet/image extraction logic + +public: + // Constructor initializes thread count and extractor + Runner(int thread_number) : thread_number(thread_number), extractor() {} + + /** + * Generates an image from a pcap file and saves it to the output directory. + * + * @param output_name Name of the saved image file (without extension) + * @param input_file Path to the input .pcap file + * @param output_dir Directory to store the resulting image + * @param args Variant type containing parameters specific to the selected ImageType + * @param preprocessing_type Type of preprocessing to apply (e.g., NONE or HEADER-based) + * @param image_type The selected image generation mode + * @param min_image_dim Minimum dimension of the generated image + * @param max_image_dim Maximum dimension of the generated image + * @param min_packets_per_flow Lower bound on packets per flow for inclusion + * @param max_packets_per_flow Upper bound on packets per flow for inclusion + * @param remove_duplicates Whether to remove duplicate packets/flows before processing + */ + void create_image( + const std::string& output_name, + const std::string& input_file, + const std::string& output_dir, + const ImageArgsVariant& args, + PacketProcessorType preprocessing_type = PacketProcessorType::NONE, + ImageType image_type = ImageType::PacketImage, + int min_image_dim = 0, + int max_image_dim = 0, + int min_packets_per_flow = 0, + int max_packets_per_flow = 0, + bool remove_duplicates = false + ) { + // Create an image matrix from the input .pcap file using provided arguments + UInt8Matrix img = extractor.createImageFromFile( + input_file, + args, + preprocessing_type, + image_type, + min_image_dim, + max_image_dim, + min_packets_per_flow, + max_packets_per_flow, + remove_duplicates + ); + + // Ensure output path is properly formed before saving + if (!output_dir.empty() && output_dir.back() == '/') { + extractor.save_image(img, output_dir + output_name); + } else { + extractor.save_image(img, output_dir + "/" + output_name); + } + } +}; \ No newline at end of file diff --git a/heifip/__init__.py b/heifip/__init__.py deleted file mode 100644 index fb8af61..0000000 --- a/heifip/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"], show_default=True) - -__author__ = "Stefan Machmeier" -__copyright__ = "Copyright 2023, heiFIP" -__credits__ = ["Manuel Trageser"] -__license__ = "EUPL" -__version__ = "1.1.1" -__maintainer__ = "Stefan Machmeier" -__email__ = "stefan.machmeier@uni-heidelberg.de" -__status__ = "Production" \ No newline at end of file diff --git a/heifip/cli.py b/heifip/cli.py deleted file mode 100644 index 0d3f047..0000000 --- a/heifip/cli.py +++ /dev/null @@ -1,360 +0,0 @@ -try: - import click -except ImportError: - raise ImportError( - "Please install Python dependencies: " "click, colorama (optional)." - ) - -from heifip import CONTEXT_SETTINGS, __version__ -from heifip.images.flow import FlowImage -from heifip.images.flow_tiled_auto import FlowImageTiledAuto -from heifip.images.flow_tiled_fixed import FlowImageTiledFixed -from heifip.images.markovchain import (MarkovTransitionMatrixFlow) -from heifip.images.packet import PacketImage -from heifip.layers import PacketProcessorType -from heifip.main import Runner - - -def add_options(options): - def _add_options(func): - for option in reversed(options): - func = option(func) - return func - - return _add_options - - -@click.version_option(version=__version__) -@click.group(context_settings=CONTEXT_SETTINGS) -def cli(): - click.secho("Starting FlowImageProcessor CLI") - - -_extract_options = [ - click.option( - "-w", - "--write", - "output_dir", - type=click.Path(), - required=True, - help="Destination file path, stores result.", - ), - click.option("-r", "--read", "input_dir", required=True, type=click.Path()), - click.option( - "-t", - "--threads", - "num_threads", - type=int, - default=4, - help="Number of parallel threads that can be used.", - ), - click.option( - "--preprocess", - "preprocessing_type", - default="NONE", - type=click.Choice( - list(map(lambda x: x.name, PacketProcessorType)), case_sensitive=False - ), - help="Applies a preprocessing to the input data:\n none: No preprocessing\n payload: Only payload data is used\n header: Preprocesses headers (DNS,HTTP,IP,IPv6,TCP,UDP supported) to remove some biasing data.", - ), - click.option( - "--min-im-dim", - "min_image_dim", - type=int, - default=0, - help="Minimum dim ouput images need to have, 0=No minimum dim.", - ), - click.option( - "--max-im-dim", - "max_image_dim", - type=int, - default=0, - help="Maximum dim ouput images can have, 0=No maximum dim.", - ), - click.option( - "--remove-duplicates", - "remove_duplicates", - is_flag=True, - default=False, - help="Within a single output folder belonging to a single input folder no duplicate images will be produced if two inputs lead to the same image.", - ), -] - -_flow_options = [ - click.option( - "--min-packets", - "min_packets_per_flow", - type=int, - default=0, - help="Minimum packets that a FlowImage needs to have, 0=No minimum packets per flow.", - ), - click.option( - "--max-packets", - "max_packets_per_flow", - type=int, - default=0, - help="Minimum packets that a FlowImage needs to have, 0=No minimum packets per flow.", - ), -] - -_image_options = [ - click.option( - "--dim", - "dim", - type=int, - default=8, - help="Dimension of the image.", - ), - click.option( - "--fill", - "fill", - type=int, - default=0, - help="Fills remaining parts of the array of the image. Important: value has to be between 0-255.", - ), -] - -_auto_dim_options = [ - click.option( - "--auto-dim", - "auto_dim", - is_flag=True, - default=False, - help="Automatically adjust size of image based on the length of the packet/s.", - ), -] - - -@cli.group(name="extract", context_settings={"show_default": True}) -def extract(): - click.secho("Extract FlowImageProcessor CLI") - - -@extract.command(name="packet") -@add_options(_extract_options) -@add_options(_flow_options) -@add_options(_image_options) -@add_options(_auto_dim_options) -def extract_packet_image( - input_dir, - output_dir, - num_threads, - preprocessing_type, - min_image_dim, - max_image_dim, - min_packets_per_flow, - max_packets_per_flow, - remove_duplicates, - dim, - fill, - auto_dim, -): - """Extracts each packet from PCAP file and converts it into a single image representation.""" - runner = Runner(num_threads) - runner.run( - input_dir, - output_dir, - preprocessing_type, - PacketImage, - min_image_dim, - max_image_dim, - min_packets_per_flow, - max_packets_per_flow, - remove_duplicates, - dim, - fill, - auto_dim, - ) - - -@extract.command(name="flow") -@add_options(_extract_options) -@add_options(_flow_options) -@add_options(_image_options) -@click.option( - "--append", - "append", - is_flag=True, - default=False, - help="", -) -def extract_flow_image( - input_dir, - output_dir, - num_threads, - preprocessing_type, - min_image_dim, - max_image_dim, - min_packets_per_flow, - max_packets_per_flow, - remove_duplicates, - dim, - fill, - append, -): - """Extracts a list of packets from PCAP file and converts it into an image. You can either append each packet or write each packet into a new line.""" - runner = Runner(num_threads) - runner.run( - input_dir, - output_dir, - preprocessing_type, - FlowImage, - min_image_dim, - max_image_dim, - min_packets_per_flow, - max_packets_per_flow, - remove_duplicates, - dim, - fill, - append, - ) - - -@extract.command(name="flow-tiled-fixed") -@add_options(_extract_options) -@add_options(_flow_options) -@add_options(_image_options) -@click.option( - "--cols", - "cols", - type=int, - default=4, - help="Number of columns for quadratic representation.", -) -def extract_flow_tiled_fixed_image( - input_dir, - output_dir, - num_threads, - preprocessing_type, - min_image_dim, - max_image_dim, - min_packets_per_flow, - max_packets_per_flow, - remove_duplicates, - dim, - fill, - cols, -): - """Extracts packets from PCAP file and converts all packets into a single quadratic image based on the number of columns. If more packets are given than the total size of cols*cols, only the first n given packets are used.""" - runner = Runner(num_threads) - runner.run( - input_dir, - output_dir, - preprocessing_type, - FlowImageTiledFixed, - min_image_dim, - max_image_dim, - min_packets_per_flow, - max_packets_per_flow, - remove_duplicates, - dim, - fill, - cols, - ) - - -@extract.command(name="flow-tiled-auto") -@add_options(_extract_options) -@add_options(_flow_options) -@add_options(_image_options) -@add_options(_auto_dim_options) -def extract_flow_tiled_fixed_image( - input_dir, - output_dir, - num_threads, - preprocessing_type, - min_image_dim, - max_image_dim, - min_packets_per_flow, - max_packets_per_flow, - remove_duplicates, - dim, - fill, - cols, -): - """Extracts packets from PCAP file and converts all packets into a single quadratic image. It adjust the size based on the total amount of packets.""" - runner = Runner(num_threads) - runner.run( - input_dir, - output_dir, - preprocessing_type, - FlowImageTiledAuto, - min_image_dim, - max_image_dim, - min_packets_per_flow, - max_packets_per_flow, - remove_duplicates, - dim, - fill, - cols, - ) - - -@extract.command(name="markov-flow") -@add_options(_extract_options) -@add_options(_flow_options) -@click.option( - "--cols", - "cols", - type=int, - default=4, - help="Number of columns for quadratic representation.", -) -def extract_markov_image( - input_dir, - output_dir, - num_threads, - preprocessing_type, - min_image_dim, - max_image_dim, - min_packets_per_flow, - max_packets_per_flow, - remove_duplicates, - cols -): - """Extracts packets from PCAP file and converts it into a quadractic Markov Transition Matrix.""" - runner = Runner(num_threads) - runner.run( - input_dir, - output_dir, - preprocessing_type, - MarkovTransitionMatrixFlow, - min_image_dim, - max_image_dim, - min_packets_per_flow, - max_packets_per_flow, - remove_duplicates, - cols - ) - -@extract.command(name="markov-packet") -@add_options(_extract_options) -@add_options(_flow_options) -def extract_markov_image( - input_dir, - output_dir, - num_threads, - preprocessing_type, - min_image_dim, - max_image_dim, - min_packets_per_flow, - max_packets_per_flow, - remove_duplicates, -): - """Extracts packets from PCAP file and converts it into a quadractic Markov Transition Matrix.""" - runner = Runner(num_threads) - runner.run( - input_dir, - output_dir, - preprocessing_type, - MarkovTransitionMatrixFlow, - min_image_dim, - max_image_dim, - min_packets_per_flow, - max_packets_per_flow, - remove_duplicates, - ) - - -if __name__ == "__main__": - cli() diff --git a/heifip/exceptions/__init__.py b/heifip/exceptions/__init__.py deleted file mode 100644 index 18ea798..0000000 --- a/heifip/exceptions/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -__author__ = "Stefan Machmeier" -__copyright__ = "Copyright 2023, heiFIP" -__credits__ = ["Manuel Trageser"] -__license__ = "EUPL" -__version__ = "1.1.1" -__maintainer__ = "Stefan Machmeier" -__email__ = "stefan.machmeier@uni-heidelberg.de" -__status__ = "Production" - -class FIPWrongParameterException(Exception): - """Wrong Parameter passed""" - diff --git a/heifip/extractor.py b/heifip/extractor.py deleted file mode 100644 index 0b85b52..0000000 --- a/heifip/extractor.py +++ /dev/null @@ -1,198 +0,0 @@ -import os - -import numpy as np -from PIL import Image as PILImage -from scapy.all import Packet - -from heifip.exceptions import FIPWrongParameterException -from heifip.images import NetworkTrafficImage -from heifip.images.flow import FlowImage -from heifip.images.flow_tiled_auto import FlowImageTiledAuto -from heifip.images.flow_tiled_fixed import FlowImageTiledFixed -from heifip.images.markovchain import (MarkovTransitionMatrixFlow, - MarkovTransitionMatrixPacket) -from heifip.images.packet import PacketImage -from heifip.layers import PacketProcessor, PacketProcessorType -from heifip.layers.packet import FIPPacket - - -class FIPExtractor: - def __init__(self): - self.processor = PacketProcessor() - self.images_created = [] - - def verify(self, image, min_image_dim: int, max_image_dim: int, remove_duplicates: bool): - if image.shape[0] < min_image_dim or image.shape[1] < min_image_dim: - return False - - if max_image_dim != 0 and (max_image_dim < image.shape[0] or max_image_dim < image.shape[1]): - return False - - # if remove_duplicates: - # im_str = image.tobytes() - # if im_str in self.images_created: - # return False - # else: - # self.images_created.append(im_str) - - return True - - def create_image_from_file( - self, - input_file: str, - preprocessing_type: PacketProcessorType = PacketProcessorType.NONE, - image_type: NetworkTrafficImage = PacketImage, - min_image_dim: int = 0, - max_image_dim: int = 0, - min_packets_per_flow: int = 0, - max_packets_per_flow: int = 0, - remove_duplicates: bool = False, - *args - ): - - assert os.path.isfile(input_file) - - packets = self.processor.read_packets_file(input_file, preprocessing_type) - - images = self.__create_matrix( - packets, - preprocessing_type, - image_type, - min_image_dim, - max_image_dim, - min_packets_per_flow, - max_packets_per_flow, - remove_duplicates, - *args - ) - - return images - - def create_image_from_packet( - self, - packets: [FIPPacket], - preprocessing_type: PacketProcessorType = PacketProcessorType.NONE, - image_type: NetworkTrafficImage = PacketImage, - min_image_dim: int = 0, - max_image_dim: int = 0, - min_packets_per_flow: int = 0, - max_packets_per_flow: int = 0, - remove_duplicates: bool = False, - *args - ): - - packets = self.processor.read_packets_packet(packets, preprocessing_type) - - images = self.__create_matrix( - packets, - preprocessing_type, - image_type, - min_image_dim, - max_image_dim, - min_packets_per_flow, - max_packets_per_flow, - remove_duplicates, - *args - ) - - return images - - def __create_matrix( - self, - packets: [FIPPacket], - preprocessing_type: PacketProcessorType = PacketProcessorType.NONE, - image_type: NetworkTrafficImage = PacketImage, - min_image_dim: int = 0, - max_image_dim: int = 0, - min_packets_per_flow: int = 0, - max_packets_per_flow: int = 0, - remove_duplicates: bool = False, - *args - ): - images = [] - if image_type == FlowImage: - # when no file matches the preprocessing - if len(packets) == 0 or len(packets) < min_packets_per_flow: - return images - - # cut packets when too many are there - if max_packets_per_flow != 0 and len(packets) > max_packets_per_flow: - packets = packets[:max_packets_per_flow] - - image = FlowImage(packets, *args) - if self.verify(image.matrix, min_image_dim, max_image_dim, remove_duplicates): - images.append(image.matrix) - - elif image_type == FlowImageTiledFixed: - # when no file matches the preprocessing - if len(packets) == 0 or len(packets) < min_packets_per_flow: - return images - - # cut packets when too many are there - if max_packets_per_flow != 0 and len(packets) > max_packets_per_flow: - packets = packets[:max_packets_per_flow] - - image = FlowImageTiledFixed(packets, *args) - if self.verify(image.matrix, min_image_dim, max_image_dim, remove_duplicates): - images.append(image.matrix) - - elif image_type == FlowImageTiledAuto: - # when no file matches the preprocessing - if len(packets) == 0 or len(packets) < min_packets_per_flow: - return images - - # cut packets when too many are there - if max_packets_per_flow != 0 and len(packets) > max_packets_per_flow: - packets = packets[:max_packets_per_flow] - - image = FlowImageTiledAuto(packets, *args) - if self.verify(image.matrix, min_image_dim, max_image_dim, remove_duplicates): - images.append(image.matrix) - - elif image_type == PacketImage: - - for packet in packets: - image = PacketImage(packet, *args) - if self.verify(image.matrix, min_image_dim, max_image_dim, remove_duplicates): - images.append(image.matrix) - - elif image_type == MarkovTransitionMatrixFlow: - # when no file matches the preprocessing - if len(packets) == 0 or len(packets) < min_packets_per_flow: - return images - - # cut packets when too many are there - if max_packets_per_flow != 0 and len(packets) > max_packets_per_flow: - packets = packets[:max_packets_per_flow] - - image = MarkovTransitionMatrixFlow(packets, *args) - if self.verify(image.matrix, min_image_dim, max_image_dim, remove_duplicates): - images.append(image.matrix) - - elif image_type == MarkovTransitionMatrixPacket: - for packet in packets: - image = MarkovTransitionMatrixPacket(packet, *args) - if self.verify(image.matrix, min_image_dim, max_image_dim, remove_duplicates): - images.append(image.matrix) - else: - raise FIPWrongParameterException - - return images - - def save_image(self, img, output_dir): - pil_img = PILImage.fromarray(img) - if not os.path.exists(os.path.realpath(os.path.dirname(output_dir))): - try: - os.makedirs(os.path.realpath(os.path.dirname(output_dir))) - except: - pass - pil_img.save(f"{output_dir}_processed.png") - - def convert(self, img, target_type_min, target_type_max, target_type): - imin = img.min() - imax = img.max() - - a = (target_type_max - target_type_min) / (imax - imin) - b = target_type_max - a * imax - new_img = (a * img + b).astype(target_type) - return new_img \ No newline at end of file diff --git a/heifip/images/__init__.py b/heifip/images/__init__.py deleted file mode 100644 index 64d4f99..0000000 --- a/heifip/images/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -from abc import ABC, abstractmethod - -__author__ = "Stefan Machmeier" -__copyright__ = "Copyright 2023, heiFIP" -__credits__ = ["Manuel Trageser"] -__license__ = "EUPL" -__version__ = "1.1.1" -__maintainer__ = "Stefan Machmeier" -__email__ = "stefan.machmeier@uni-heidelberg.de" -__status__ = "Production" - - -class NetworkTrafficImage(ABC): - def __init__(self, fill=0, dim=8) -> None: - self.fill = fill - self.dim = dim diff --git a/heifip/images/flow.py b/heifip/images/flow.py deleted file mode 100644 index e795236..0000000 --- a/heifip/images/flow.py +++ /dev/null @@ -1,49 +0,0 @@ -import binascii - -import numpy as np -from scapy.all import Packet, raw - -from heifip.images import NetworkTrafficImage - - -class FlowImage(NetworkTrafficImage): - def __init__( - self, - packets, - dim=16, - fill=0, - append=False, - ) -> None: - NetworkTrafficImage.__init__(self, fill, dim) - self.packets = packets - self.append = append - self.matrix, self.binaries = self.__get_matrix(self.dim, self.append, self.fill, self.packets) - del packets - - - def __get_matrix(self, dim: int, append: bool, fill: int, packets: [Packet]): - """ - Creates a matrix of a list of Scapy Packet. - """ - binaries = [] - for packet in self.packets: - # get Hex data - hexst = binascii.hexlify(raw(packet.packet)) - # Append octet as integer - binaries.append( - [int(hexst[i : i + 2], 16) for i in range(0, len(hexst), 2)] - ) - fh = None - # Append packets after another or write each packet in a row - if append: - fh = np.concatenate([np.array(xi) for xi in binaries]) - rn = len(fh) // dim + (len(fh) % dim > 0) - fh = np.pad(fh, (0, (rn * dim) - fh.shape[0]), 'constant') - fh = fh.reshape(rn, dim) - else: - length = max(map(len, binaries)) - fh = np.array([xi + [fill] * (length - len(xi)) for xi in binaries]) - - fh = np.uint8(fh) - - return fh, binaries diff --git a/heifip/images/flow_tiled_auto.py b/heifip/images/flow_tiled_auto.py deleted file mode 100644 index 4ce37bb..0000000 --- a/heifip/images/flow_tiled_auto.py +++ /dev/null @@ -1,88 +0,0 @@ -import binascii - -import numpy as np -from scapy.all import Packet, raw - -from heifip.images import NetworkTrafficImage - - -class FlowImageTiledAuto(NetworkTrafficImage): - def __init__( - self, - packets, - dim=16, - fill=0, - auto_dim=False, - ) -> None: - NetworkTrafficImage.__init__(self, fill, dim) - self.packets = packets - self.auto_dim = auto_dim - self.matrix, self.binaries = self.__get_matrix_tiled(self.fill, self.dim, self.auto_dim, packets) - del packets - - def __get_matrix_tiled(self, fill: int, dim: int, auto_dim: bool, packets: [Packet]): - """ - Creates a matrix of a list of Scapy Packet. - Packets are tiled into a quadratic representation. - """ - binaries = [] - for packet in self.packets: - # get Hex data - hexst = binascii.hexlify(raw(packet.packet)) - # Append octet as integer - binaries.append( - [int(hexst[i : i + 2], 16) for i in range(0, len(hexst), 2)] - ) - - length = max(map(len, binaries)) - # Get dim of packet, using auto_dim uses the largest packet as dim reference - if auto_dim: - dim = int(np.ceil(np.sqrt(length))) - - result = [] - for x in binaries: - x = x[: dim * dim] - x = np.array(x + [fill] * (dim * dim - len(x))) - x = x.reshape(dim, dim) - result.append(x) - - # Get size of total image - length_total = len(result) - dim_total = int(np.ceil(np.sqrt(length_total))) - # dim_total = 4 - - # Create tiled image - fh = self.__tile_images(result, dim_total, dim) - # Convert to int - fh = np.uint8(fh) - return fh, binaries - - def __tile_images(self, images, cols: int, dim: int): - """Tile images of same size to grid with given number of columns. - - Args: - images (collection of ndarrays) - cols (int): number of colums - - Returns: - ndarray: stitched image - """ - k = 0 - rows = [] - for i in range(0, cols): - row = None - for j in range(0, cols): - if len(images) > k: - im = images[k] - else: - im = np.zeros((dim, dim)) - - if row is None: - row = im - else: - row = np.concatenate((row, im), axis=1) - k += 1 - rows.append(row) - tiled = np.concatenate(rows) - - return tiled \ No newline at end of file diff --git a/heifip/images/flow_tiled_fixed.py b/heifip/images/flow_tiled_fixed.py deleted file mode 100644 index 615af56..0000000 --- a/heifip/images/flow_tiled_fixed.py +++ /dev/null @@ -1,79 +0,0 @@ -import binascii - -import numpy as np -from scapy.all import Packet, raw - -from heifip.images import NetworkTrafficImage - - -class FlowImageTiledFixed(NetworkTrafficImage): - def __init__( - self, - packets, - dim=16, - fill=0, - cols=3, - ) -> None: - NetworkTrafficImage.__init__(self, fill, dim) - self.packets = packets - self.cols = cols - self.matrix, self.binaries = self.__get_matrix_tiled(self.fill, self.dim, self.cols, packets) - del packets - - def __get_matrix_tiled(self, fill: int, dim: int, cols: int, packets: [Packet]): - """ - Creates a matrix of a list of Scapy Packet. - Packets are tiled into a quadratic representation. - """ - binaries = [] - for packet in self.packets: - # get Hex data - hexst = binascii.hexlify(raw(packet.packet)) - # Append octet as integer - binaries.append( - [int(hexst[i : i + 2], 16) for i in range(0, len(hexst), 2)] - ) - - result = [] - for x in binaries: - x = x[: dim * dim] - x = np.array(x + [fill] * (dim * dim - len(x))) - x = x.reshape(dim, dim) - result.append(x) - - # Create tiled image - fh = self.__tile_images(result, cols, dim) - # Convert to int - fh = np.uint8(fh) - return fh, binaries - - def __tile_images(self, images, cols: int, dim: int): - """Tile images of same size to grid with given number of columns. - - Args: - images (collection of ndarrays) - cols (int): number of colums - - Returns: - ndarray: stitched image - """ - k = 0 - rows = [] - for i in range(0, cols): - row = None - for j in range(0, cols): - if len(images) > k: - im = images[k] - else: - im = np.zeros((dim, dim)) - - if row is None: - row = im - else: - row = np.concatenate((row, im), axis=1) - k += 1 - - rows.append(row) - tiled = np.concatenate(rows) - - return tiled \ No newline at end of file diff --git a/heifip/images/markovchain.py b/heifip/images/markovchain.py deleted file mode 100644 index ee83e99..0000000 --- a/heifip/images/markovchain.py +++ /dev/null @@ -1,97 +0,0 @@ -import numpy as np -from scapy.all import Packet - -from heifip.images import NetworkTrafficImage - - -class MarkovTransitionMatrix(NetworkTrafficImage): - def __init__( - self, - ) -> None: - NetworkTrafficImage.__init__(self) - - def bit_array(self, packet): - bytes_as_bits = ''.join(format(byte, '08b') for byte in bytes(packet.packet)) - transition = [] - for i in range(0, len(bytes_as_bits), 4): - transition.append(int(bytes_as_bits[i:i+4], 2)) - return transition - - def transition_matrix(self, transitions): - n = 16 - - M = [[0]*n for _ in range(n)] - - for (i,j) in zip(transitions,transitions[1:]): - M[i][j] += 1 - - #now convert to probabilities: - for row in M: - s = sum(row) - if s > 0: - row[:] = [f/s for f in row] - return M - -class MarkovTransitionMatrixFlow(MarkovTransitionMatrix): - def __init__( - self, - packets: [Packet], - cols: int = 4 - ) -> None: - MarkovTransitionMatrix.__init__(self) - - result = [] - for packet in packets: - transition = self.bit_array(packet) - m = self.transition_matrix(transition) - result.append(np.array(m)) - - # Create tiled image - fh = self.__tile_images(result, cols, 16) - - # Convert to int - self.matrix = fh - del packets - - def __tile_images(self, images, cols, dim): - """Tile images of same size to grid with given number of columns. - - Args: - images (collection of ndarrays) - cols (int): number of colums - - Returns: - ndarray: stitched image - """ - k = 0 - rows = [] - for i in range(0, cols): - row = None - for j in range(0, cols): - if len(images) > k: - im = images[k] - else: - im = np.zeros((dim, dim)) - - if row is None: - row = im - else: - row = np.concatenate((row, im), axis=1) - k += 1 - rows.append(row) - tiled = np.concatenate(rows) - - return tiled - -class MarkovTransitionMatrixPacket(MarkovTransitionMatrix): - def __init__( - self, - packet: Packet, - ) -> None: - MarkovTransitionMatrix.__init__(self) - - transition = self.bit_array(packet) - m = self.transition_matrix(transition) - self.matrix = np.array(m) - - del packet diff --git a/heifip/images/packet.py b/heifip/images/packet.py deleted file mode 100644 index 15e7069..0000000 --- a/heifip/images/packet.py +++ /dev/null @@ -1,39 +0,0 @@ -import binascii - -import numpy as np -from scapy.all import Packet, raw - -from heifip.images import NetworkTrafficImage - - -class PacketImage(NetworkTrafficImage): - def __init__( - self, - packet: Packet, - dim=8, - fill=0, - auto_dim=False - ) -> None: - NetworkTrafficImage.__init__(self, fill, dim) - self.auto_dim = auto_dim - self.matrix, self.binaries = self.__get_matrix(self.dim, self.auto_dim, self.fill, packet) - - del packet - - def __get_matrix(self, dim: int, auto_dim: int, fill: int, packet: Packet): - # get Hex data - hexst = binascii.hexlify(raw(packet.packet)) - # Append octet as integer - binaries = [int(hexst[i: i + 2], 16) for i in range(0, len(hexst), 2)] - # Get min dim - length = len(binaries) - if auto_dim: - dim = int(np.ceil(np.sqrt(length))) - - # Create array and shape it to dim - fh = np.array(binaries + [fill] * (dim * dim - len(binaries))) - fh = fh[0:dim * dim].reshape(dim, dim) - - fh = np.uint8(fh) - - return fh, binaries diff --git a/heifip/layers/__init__.py b/heifip/layers/__init__.py deleted file mode 100644 index c9a2422..0000000 --- a/heifip/layers/__init__.py +++ /dev/null @@ -1,102 +0,0 @@ -import os -from enum import Enum, unique - -from scapy.all import (Packet,load_layer, - sniff, wrpcap) -from scapy.layers.dns import DNS -from scapy.layers.http import HTTP, HTTPRequest, HTTPResponse -from scapy.layers.inet import IP, TCP, UDP, Ether -from scapy.layers.inet6 import IPv6 - -from heifip.layers.dns import DNSPacket -from heifip.layers.http import (HTTPPacket, HTTPRequestPacket, - HTTPResponsePacket) -from heifip.layers.ip import IPPacket -from heifip.layers.packet import EtherPacket, FIPPacket, UnknownPacket -from heifip.layers.transport import TransportPacket - -__author__ = "Stefan Machmeier" -__copyright__ = "Copyright 2023, heiFIP" -__credits__ = ["Manuel Trageser"] -__license__ = "EUPL" -__version__ = "1.1.1" -__maintainer__ = "Stefan Machmeier" -__email__ = "stefan.machmeier@uni-heidelberg.de" -__status__ = "Production" - -SUPPORTED_HEADERS = [IP, IPv6, DNS, HTTPRequest, HTTPResponse, TCP, UDP] - - -@unique -class PacketProcessorType(Enum): - NONE = 1 - HEADER = 2 - - -class PacketProcessor: - def __init__( - self, - file_extension="pcap", - ) -> None: - self.hash_dict = set() - load_layer("tls") - - def write_packet(self) -> None: - # Write pcap - wrpcap(f"{self.filename}_converted.pcap", self.packets, append=True) - - def read_packets_file(self, file: str, preprocessing_type: PacketProcessorType) -> [FIPPacket]: - assert os.path.isfile(file) - - # Read PCAP file with Scapy - packets = [] - # TODO Only read max number of packets - pcap = sniff(offline=file, count=64) - for pkt in pcap: - # Start preprocessing for each packet - processed_packet = self.__preprocessing(pkt, preprocessing_type) - # TODO Run extract here to reduce amount of loops in code. Atm very inefficient for computation time and memory - # In case packet returns None - if processed_packet != None: - if not processed_packet.hash in self.hash_dict: - # TODO Turn off/on hash filtering - # self.hash_dict.add(processed_packet.hash) - packets.append(processed_packet) - return packets - - - def read_packets_packet(self, packet: [Packet], preprocessing_type: PacketProcessorType) -> [FIPPacket]: - # Read PCAP file with Scapy - packets = [] - for pkt in packet: - # Start preprocessing for each packet - processed_packet = self.__preprocessing(pkt, preprocessing_type) - # In case packet returns None - if processed_packet != None: - if not processed_packet.hash in self.hash_dict: - self.hash_dict.add(processed_packet.hash) - packets.append(processed_packet) - return packets - - def __preprocessing(self, packet: Packet, preprocessing_type: PacketProcessorType) -> FIPPacket: - fippacket = UnknownPacket(packet) - if HTTP in fippacket.layer_map: - if HTTPRequest in fippacket.layer_map: - fippacket = fippacket.convert(HTTPRequestPacket, fippacket) - elif HTTPResponse in fippacket.layer_map: - fippacket = fippacket.convert(HTTPResponsePacket, fippacket) - else: - fippacket = fippacket.convert(HTTPPacket, fippacket) - elif DNS in fippacket.layer_map: - fippacket = fippacket.convert(DNSPacket, fippacket) - elif TCP in fippacket.layer_map or UDP in fippacket.layer_map: - fippacket = fippacket.convert(TransportPacket, fippacket) - elif IP in fippacket.layer_map or IPv6 in fippacket.layer_map: - fippacket = fippacket.convert(IPPacket, fippacket) - elif Ether in fippacket.layer_map: - fippacket = fippacket.convert(EtherPacket, fippacket) - - if preprocessing_type == "HEADER": - fippacket.header_preprocessing() - - return fippacket diff --git a/heifip/layers/dns.py b/heifip/layers/dns.py deleted file mode 100644 index e71f2ab..0000000 --- a/heifip/layers/dns.py +++ /dev/null @@ -1,67 +0,0 @@ -from scapy.all import Packet -from scapy.layers.dns import DNS - -from heifip.layers.transport import TransportPacket -from heifip.plugins.header import CustomDNS, CustomDNSQR, CustomDNSRR - - -class DNSPacket(TransportPacket): - def __init__(self, packet: Packet, address_mapping={}, layer_map={}) -> None: - TransportPacket.__init__(self, packet, address_mapping, layer_map) - - def header_preprocessing(self): - # TODO: Fix issue with DNS processing - if self.packet[DNS].qd: - self.__header_preprocessing_message_type(self.packet, "qd") - if self.packet[DNS].an: - self.__header_preprocessing_message_type(self.packet, "an") - if self.packet[DNS].ns: - self.__header_preprocessing_message_type(self.packet, "ns") - if self.packet[DNS].ar: - self.__header_preprocessing_message_type(self.packet, "ar") - - layer_copy = self.packet[DNS] - - new_layer = CustomDNS( - qr=layer_copy.qr, - opcode=layer_copy.opcode, - aa=layer_copy.aa, - tc=layer_copy.tc, - rd=layer_copy.rd, - ra=layer_copy.ra, - z=layer_copy.z, - ad=layer_copy.ad, - cd=layer_copy.cd, - rcode=layer_copy.rcode, - qd=layer_copy.qd, - an=layer_copy.an, - ns=layer_copy.ns, - ar=layer_copy.ar, - ) - - self.packet[DNS] /= new_layer - - super().header_preprocessing() - - - def __header_preprocessing_message_type(self, packet: Packet, message_type: str): - message = getattr(packet[DNS], message_type) - if message_type == "qd": - new_message = CustomDNSQR(qname=message.qname, qtype=message.qtype) - - while message:=message.payload: - new_message /= CustomDNSQR( - qname=message.qname, - qtype=message.qtype, - ) - else: - new_message = CustomDNSRR( - rrname=message.rrname, type=message.type - ) - - while message:=message.payload: - new_message /= CustomDNSRR( - rrname=message.rrname, type=message.type - ) - - setattr(packet[DNS], message_type, new_message) diff --git a/heifip/layers/http.py b/heifip/layers/http.py deleted file mode 100644 index 04e21cf..0000000 --- a/heifip/layers/http.py +++ /dev/null @@ -1,71 +0,0 @@ -import hashlib - -from scapy.all import Packet, Raw -from scapy.layers.http import HTTPRequest, HTTPResponse - -from heifip.layers.transport import TransportPacket -from heifip.plugins.header import (CustomHTTP_Request, - CustomHTTP_Response) - - -class HTTPPacket(TransportPacket): - def __init__(self, packet: Packet, address_mapping={}, layer_map={}): - TransportPacket.__init__(self, packet, address_mapping, layer_map) - def header_preprocessing(self): - super().header_preprocessing() - - -class HTTPRequestPacket(HTTPPacket): - def __init__(self, packet: Packet, address_mapping={}, layer_map={}): - HTTPPacket.__init__(self, packet, address_mapping, layer_map) - self.hash = hashlib.md5(f"{self.packet[HTTPRequest].Path},{self.packet[HTTPRequest].Method},{self.packet[HTTPRequest].Accept}".encode('utf-8')).hexdigest() - if Raw in self.layer_map: - self.packet[HTTPRequest].remove_payload() - - def header_preprocessing(self): - layer_copy = self.packet[HTTPRequest] - layer_copy = CustomHTTP_Request( - Method=layer_copy.Method, - Path=layer_copy.Path, - User_Agent=layer_copy.User_Agent, - Content_Type=layer_copy.Content_Type, - Connection=layer_copy.Connection, - Accept=layer_copy.Accept, - Accept_Charset=layer_copy.Accept_Charset, - Cookie=layer_copy.Cookie, - TE=layer_copy.TE, - ) - - if not self.packet[HTTPRequest].payload is None: - layer_copy.payload = self.packet[HTTPRequest].payload - - self.packet[HTTPRequest] = layer_copy - - super().header_preprocessing() - - -class HTTPResponsePacket(HTTPPacket): - def __init__(self, packet: Packet, address_mapping={}, layer_map={}): - HTTPPacket.__init__(self, packet, address_mapping, layer_map) - self.hash = hashlib.md5(f"{self.packet[HTTPResponse].Server},{self.packet[HTTPResponse].Status_Code},{self.packet[HTTPResponse].Connection}".encode('utf-8')).hexdigest() - if Raw in self.layer_map: - self.packet[HTTPResponse].remove_payload() - - def header_preprocessing(self): - layer_copy = self.packet[HTTPResponse] - layer_copy = CustomHTTP_Response( - Status_Code=layer_copy.Status_Code, - Server=layer_copy.Server, - Content_Type=layer_copy.Content_Type, - Connection=layer_copy.Connection, - Content_Encoding=layer_copy.Content_Encoding, - Set_Cookie=layer_copy.Set_Cookie, - Transfer_Encoding=layer_copy.Transfer_Encoding, - ) - - if self.packet[HTTPResponse].payload != None: - layer_copy.payload = self.packet[HTTPResponse].payload - - self.packet[HTTPResponse] = layer_copy - - super().header_preprocessing() diff --git a/heifip/layers/ip.py b/heifip/layers/ip.py deleted file mode 100644 index bac8c96..0000000 --- a/heifip/layers/ip.py +++ /dev/null @@ -1,100 +0,0 @@ -import hashlib - -from scapy.all import Packet, RandIP, RandIP6, Raw -from scapy.layers.http import HTTP -from scapy.layers.inet import IP, TCP, UDP -from scapy.layers.inet6 import IPv6 -from scapy.layers.tls.all import TLS - -from heifip.layers.packet import EtherPacket -from heifip.plugins.header import CustomIP, CustomIPv6 - - -class IPPacket(EtherPacket): - def __init__(self, packet: Packet, address_mapping={}, layer_map={}): - EtherPacket.__init__(self, packet, address_mapping, layer_map) - if IP in self.layer_map: - self.__filter_ipv4() - self.hash = hashlib.md5(f"{self.packet[IP].version},{self.packet[IP].flags},{self.packet[IP].proto}".encode('utf-8')).hexdigest() - if TLS in self.layer_map and not (TCP in self.layer_map or UDP in self.layer_map): - self.packet[IP].remove_payload() - if Raw in self.layer_map and not (TCP in self.layer_map or UDP in self.layer_map or HTTP in self.layer_map): - self.packet[IP].remove_payload() - elif IPv6 in self.layer_map: - self.__filter_ipv6() - self.hash = hashlib.md5(f"{self.packet[IPv6].version},{self.packet[IPv6].tc},{self.packet[IPv6].hlim}".encode('utf-8')).hexdigest() - if TLS in self.layer_map and not (TCP in self.layer_map or UDP in self.layer_map): - self.packet[IPv6].remove_payload() - if Raw in self.layer_map and not (TCP in self.layer_map or UDP in self.layer_map or HTTP in self.layer_map): - self.packet[IPv6].remove_payload() - - def __filter_ipv4(self): - previous_src = self.packet[IP].src - previous_dst = self.packet[IP].dst - - if previous_src in self.address_mapping: - new_src = self.address_mapping[previous_src] - else: - new_src = RandIP()._fix() - self.address_mapping[previous_src] = new_src - - if previous_dst in self.address_mapping: - new_dst = self.address_mapping[previous_dst] - else: - new_dst = RandIP()._fix() - self.address_mapping[previous_dst] = new_dst - - self.packet[IP].src = new_src - self.packet[IP].dst = new_dst - - def header_preprocessing(self): - if IP in self.layer_map: - layer_copy = self.packet[IP] - layer_copy = self.header_preprocessing_ipv4(layer_copy) - if self.packet[IP].payload != None: - layer_copy.payload = self.packet[IP].payload - self.packet[IP] = layer_copy - if IPv6 in self.layer_map: - layer_copy = self.packet[IPv6] - layer_copy = self.header_preprocessing_ipv6(layer_copy) - if self.packet[IPv6].payload != None: - layer_copy.payload = self.packet[IPv6].payload - self.packet[IPv6] = layer_copy - - super().header_preprocessing() - - def header_preprocessing_ipv4(self, layer_copy: Packet): - return CustomIP( - version=layer_copy.version, - tos=layer_copy.tos, - ttl=layer_copy.ttl, - flags=layer_copy.flags, - proto=layer_copy.proto, - ) - - def __filter_ipv6(self): - previous_src = self.packet[IPv6].src - previous_dst = self.packet[IPv6].dst - - if previous_src in self.address_mapping: - new_src = self.address_mapping[previous_src] - else: - new_src = RandIP6()._fix() - self.address_mapping[previous_src] = new_src - - if previous_dst in self.address_mapping: - new_dst = self.address_mapping[previous_dst] - else: - new_dst = RandIP6()._fix() - self.address_mapping[previous_dst] = new_dst - - self.packet[IPv6].src = new_src - self.packet[IPv6].dst = new_dst - - def header_preprocessing_ipv6(self, layer_copy: Packet): - return CustomIPv6( - version=layer_copy.version, - tc=layer_copy.tc, - nh=layer_copy.nh, - hlim=layer_copy.hlim, - ) diff --git a/heifip/layers/packet.py b/heifip/layers/packet.py deleted file mode 100644 index 7633c8f..0000000 --- a/heifip/layers/packet.py +++ /dev/null @@ -1,63 +0,0 @@ -import hashlib - -from scapy.all import RandMAC -from scapy.layers.inet import Ether - - -class FIPPacket: - def __init__(self, packet, address_mapping={}, layer_map={}): - self.address_mapping = address_mapping - self.packet = packet - self.hash = hashlib.md5().hexdigest() - - if layer_map == {}: - self.layer_map = self.__get_layers() - - def __get_layers(self): - layer_map = dict() - layers = self.packet.layers() - for layer_class in layers: - layer_map[layer_class] = 1 - return layer_map - - def convert(self, packet_type, packet): - return packet_type(packet.packet, packet.address_mapping, packet.layer_map) - - def header_preprocessing(self): - pass - -class UnknownPacket(FIPPacket): - def __init__(self, packet, address_mapping={}, layer_map={}): - FIPPacket.__init__(self, packet, address_mapping, layer_map) - - def header_preprocessing(self): - super().header_preprocessing() - -class EtherPacket(FIPPacket): - def __init__(self, packet, address_mapping={}, layer_map={}): - FIPPacket.__init__(self, packet, layer_map, address_mapping) - - if Ether in self.layer_map: - self.__filter() - - def __filter(self): - previous_src = self.packet[Ether].src - previous_dst = self.packet[Ether].dst - - if previous_src in self.address_mapping: - new_src = self.address_mapping[previous_src] - else: - new_src = RandMAC()._fix() - self.address_mapping[previous_src] = new_src - - if previous_dst in self.address_mapping: - new_dst = self.address_mapping[previous_dst] - else: - new_dst = RandMAC()._fix() - self.address_mapping[previous_dst] = new_dst - - self.packet[Ether].src = new_src - self.packet[Ether].dst = new_dst - - def header_preprocessing(self): - super().header_preprocessing() \ No newline at end of file diff --git a/heifip/layers/ssh.py b/heifip/layers/ssh.py deleted file mode 100644 index 01581e1..0000000 --- a/heifip/layers/ssh.py +++ /dev/null @@ -1,11 +0,0 @@ -from scapy.all import Packet - -from heifip.layers.transport import TransportPacket - - -class SSHPacketProcessor(TransportPacket): - def __init__(self, packet: Packet, address_mapping={}, layer_map={}): - TransportPacket.__init__(self, packet, address_mapping, layer_map) - - def header_preprocessing(self): - super().header_preprocessing() \ No newline at end of file diff --git a/heifip/layers/transport.py b/heifip/layers/transport.py deleted file mode 100644 index 0f9ecef..0000000 --- a/heifip/layers/transport.py +++ /dev/null @@ -1,50 +0,0 @@ -import hashlib - -from scapy.all import Packet, Raw -from scapy.layers.http import HTTP -from scapy.layers.inet import TCP, UDP -from scapy.layers.tls.all import TLS - -from heifip.layers.ip import IPPacket -from heifip.plugins.header import CustomTCP, CustomUDP - - -class TransportPacket(IPPacket): - def __init__(self, packet: Packet, address_mapping={}, layer_map={}): - IPPacket.__init__(self, packet, address_mapping, layer_map) - if TCP in self.layer_map: - self.hash = hashlib.md5(f"{self.packet[TCP].flags},{self.packet[TCP].options}".encode('utf-8')).hexdigest() - if TLS in self.layer_map: - self.packet[TCP].remove_payload() - if Raw in self.layer_map and not HTTP in self.layer_map: - self.packet[TCP].remove_payload() - elif UDP in self.layer_map: - self.hash = hashlib.md5(f"{self.packet[UDP].name}".encode('utf-8')).hexdigest() - if TLS in self.layer_map: - self.packet[UDP].remove_payload() - if Raw in self.layer_map and not HTTP in self.layer_map: - self.packet[UDP].remove_payload() - - - def header_preprocessing(self): - if TCP in self.layer_map: - layer_copy = self.packet[TCP] - layer_copy = self.header_preprocessing_tcp(layer_copy) - if self.packet[TCP].payload != None: - layer_copy.payload = self.packet[TCP].payload - self.packet[TCP] = layer_copy - - if UDP in self.layer_map: - layer_copy = self.packet[UDP] - layer_copy = self.header_preprocessing_udp(layer_copy) - if self.packet[UDP].payload != None: - layer_copy.payload = self.packet[UDP].payload - self.packet[UDP] = layer_copy - - super().header_preprocessing() - - def header_preprocessing_tcp(self, layer_copy: Packet): - return CustomTCP(flags=layer_copy.flags, options=layer_copy.options) - - def header_preprocessing_udp(self, layer_copy: Packet): - return CustomUDP() diff --git a/heifip/main.py b/heifip/main.py deleted file mode 100644 index 2670213..0000000 --- a/heifip/main.py +++ /dev/null @@ -1,85 +0,0 @@ -import asyncio -import fnmatch -import glob -import logging -import os -from os.path import dirname, realpath -from queue import Queue -from threading import Thread - -from PIL import Image as PILImage -from tqdm import tqdm - -from heifip.extractor import FIPExtractor -from heifip.images.flow import FlowImage - -import pickle - - -class Runner: - def __init__(self, thread_number) -> None: - self.thread_number = thread_number - self.extractor = FIPExtractor() - - def create_image( - self, - input_file: str, - output_dir: str, - pbar, - *args - ): - imgs = self.extractor.create_image_from_file(input_file, *args) - pbar.update(1) - for img in imgs: - self.extractor.save_image(img, output_dir) - - def start_process( - self, - file_queue, - pbar, - *args, - ): - while not file_queue.empty(): - input_file, output_dir = file_queue.get() - self.create_image( - input_file, - output_dir, - pbar, - *args, - ) - file_queue.task_done() - - def run( - self, - input_dir: str, - output_dir: str, - *args - ): - - # Get all executable files in input directory and add them into queue - file_queue = Queue() - total_files = 0 - for root, dirnames, filenames in os.walk(input_dir): - for filename in fnmatch.filter(filenames, "*.pcap"): - match = os.path.join(root, filename) - sub_dir = match.replace(input_dir, "") - file_queue.put((match, f"{output_dir}/{sub_dir}")) - total_files += 1 - - # Start thread - pbar = tqdm(total=total_files) - for _ in range(self.thread_number): - thread = Thread( - target=self.start_process, - args=( - file_queue, - pbar, - *args - ), - ) - thread.daemon = True - thread.start() - file_queue.join() - pbar.close() - # with open('hashes_pkt.pkl', 'wb') as f: - # pickle.dump(self.extractor.processor.hash_dict, f) diff --git a/heifip/plugins/__init__.py b/heifip/plugins/__init__.py deleted file mode 100644 index 799770e..0000000 --- a/heifip/plugins/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -from . import header - -__author__ = "Stefan Machmeier" -__copyright__ = "Copyright 2023, heiFIP" -__credits__ = ["Manuel Trageser"] -__license__ = "EUPL" -__version__ = "1.1.1" -__maintainer__ = "Stefan Machmeier" -__email__ = "stefan.machmeier@uni-heidelberg.de" -__status__ = "Production" \ No newline at end of file diff --git a/heifip/plugins/header.py b/heifip/plugins/header.py deleted file mode 100644 index e9c2297..0000000 --- a/heifip/plugins/header.py +++ /dev/null @@ -1,123 +0,0 @@ -from scapy.all import (IP_PROTOS, BitEnumField, BitField, - ByteEnumField, ByteField, DNSQRField, DNSRRField, - FlagsField, IntField, Packet, ShortEnumField, - StrField, TCPOptionsField, XByteField) -from scapy.layers.dns import DNSStrField, InheritOriginDNSStrPacket, dnstypes -from scapy.layers.inet6 import ipv6nh - - -class CustomIP(Packet): - name = "IP" - fields_desc = [ - BitField("version", 4, 4), - FlagsField("flags", 0, 4, ["R", "DF", "MF"]), # normally 3 bits last bit will always be 0 - XByteField("tos", 0), - ByteField("ttl", 64), - ByteEnumField("proto", 0, IP_PROTOS), - ] - -class CustomIPv6(Packet): - name = "IPv6" - fields_desc = [ - BitField("version", 6, 8), # normally 4 bits, last 4bits will always be 0 - BitField("tc", 0, 8), - ByteEnumField("nh", 59, ipv6nh), - ByteField("hlim", 64) - ] - -class CustomTCP(Packet): - name = "TCP" - fields_desc = [ - FlagsField("flags", 0x2, 16, "FSRPAUECN"), - TCPOptionsField("options", "") - ] - -class CustomUDP(Packet): - name = "UDP" - fields_desc = [] - -class CustomHTTP(Packet): - def self_build(self): - p = b"" - - for f in self.fields_desc: - val = self.getfieldval(f.name) - #when Value is not set - if not val: - continue - if f.name not in ['Method', 'Path', 'Status_Code']: - val = bytes((f.name).encode()) + b": " + bytes(val) - if f.name in ['Method', 'Path', 'Status_Code']: - seperator = b' ' - else: - seperator = b'\r\n' - - p = f.addfield(self, p, val + seperator) - - return p - - -class CustomHTTP_Request(CustomHTTP): - name = "HTTP Request" - fields_desc = [ - StrField("Method", "GET"), - StrField("Path", "/"), - StrField("User_Agent", None), - StrField("Content_Type", None), - StrField("Connection", None), - StrField("Accept", None), - StrField("Accept_Charset", None), - StrField("Accept_Encoding", None), - StrField("Cookie", None), - StrField("TE", None) - ] - -class CustomHTTP_Response(CustomHTTP): - name = "HTTP Response" - fields_desc = [ - StrField("Status_Code", "200"), - StrField("Connection", None), - StrField("Content_Encoding", None), - StrField("Content_Type", None), - StrField("Server", None), - StrField("Set_Cookie", None), - StrField("Transfer_Encoding", None) - ] - -class CustomDNSQR(InheritOriginDNSStrPacket): - name = "DNS Question Record" - show_indent = 0 - fields_desc = [ - DNSStrField("qname", "none"), - ShortEnumField("qtype", 1, dnstypes) - ] - -class CustomDNSRR(InheritOriginDNSStrPacket): - name = "DNS Resource Record" - show_indent = 0 - fields_desc = [ - DNSStrField("rrname", ""), - ShortEnumField("type", 1, dnstypes), - IntField("ttl", 0) - ] - -class CustomDNS(Packet): - name = "DNS" - fields_desc = [ - BitField("qr", 0, 1), - BitEnumField("opcode", 0, 4, {0: "QUERY", 1: "IQUERY", 2: "STATUS"}), - BitField("aa", 0, 1), - BitField("tc", 0, 1), - BitField("rd", 1, 1), - BitField("ra", 0, 1), - BitField("z", 0, 1), - BitField("ad", 0, 1), - BitField("cd", 0, 1), - BitEnumField("rcode", 0, 4, {0: "ok", 1: "format-error", - 2: "server-failure", 3: "name-error", - 4: "not-implemented", 5: "refused"}), - DNSQRField("qd", "", None), - DNSRRField("an", "", None), - DNSRRField("ns", "", None), - DNSRRField("ar", "", None), - ] diff --git a/heifip/protocols/__init__.py b/heifip/protocols/__init__.py deleted file mode 100644 index 815c5a3..0000000 --- a/heifip/protocols/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -__author__ = "Stefan Machmeier" -__copyright__ = "Copyright 2023, heiFIP" -__credits__ = ["Manuel Trageser"] -__license__ = "EUPL" -__version__ = "1.1.1" -__maintainer__ = "Stefan Machmeier" -__email__ = "stefan.machmeier@uni-heidelberg.de" -__status__ = "Production" - -from ssh import SSH \ No newline at end of file diff --git a/heifip/protocols/ssh.py b/heifip/protocols/ssh.py deleted file mode 100644 index 4de9f13..0000000 --- a/heifip/protocols/ssh.py +++ /dev/null @@ -1,291 +0,0 @@ - -import hashlib -import os - -from scapy.all import * -from scapy.all import TCP - - -class StrCustomTerminatorField(StrField): - __slots__ = ["remain", "terminator", "consume_terminator"] - - def __init__(self, name, default, fmt="H", remain=0, terminator="\x00\x00", consume_terminator=True): - super().__init__(name, default, fmt, remain) - - self.terminator = terminator - self.consume_terminator = consume_terminator - - def addfield(self, pkt, s, val): - return s+self.i2m(pkt, val)+self.terminator - - def getfield(self, pkt, s): - l = s.find(self.terminator) - if l < 0: - # XXX terminator not found - return "", s - if self.consume_terminator: - return s[l+len(self.terminator):], self.m2i(pkt, s[:l]) - return s[l:], self.m2i(pkt, s[:l]) - - def randval(self): - return RandTermString(RandNum(0, 1200), self.terminator) - - -class HintField(StrField): - def __init__(self, name, default, fmt="H", remain=0): - super().__init__(name, default, fmt, remain) - - def i2len(self, pkt, i): - return 0 - - def i2m(self, pkt, x): - return '' - - -class DynamicStrField(Field): - __slots__ = [ - "name", - "fmt", - "default", - "sz", - "owners", - "struct", - "remain", - "adjust" - ] - - def __init__(self, name, default, fmt="H", remain=0, adjust=lambda pkt, x: x): - - super().__init__(name, default, fmt) - - self.remain = remain - - self.adjust = adjust - - def i2len(self, pkt, i): - return len(i) - - def i2m(self, pkt, x): - if x is None: - x = "" - elif type(x) is not str: - x = str(x) - - x = self.adjust(pkt, x) - return x - - def addfield(self, pkt, s, val): - return s+self.i2m(pkt, val) - - def getfield(self, pkt, s): - if self.remain == 0: - return "", self.m2i(pkt, s) - else: - return s[-self.remain:], self.m2i(pkt, s[:-self.remain]) - - def randval(self): - return RandBin(RandNum(0, 1200)) - - -class BLenField(LenField): - __slots__ = ["adjust", "numbytes", "length_of", "count_of"] - - def __init__(self, name, default, fmt="I", adjust=lambda pkt, x: x, numbytes=None, length_of=None, count_of=None): - self.name = name - self.adjust = adjust - self.numbytes = numbytes - self.length_of = length_of - self.count_of = count_of - super().__init__(name, default, fmt) - - if fmt[0] in "@=<>!": - self.fmt = fmt - else: - self.fmt = "!"+fmt - self.default = self.any2i(None, default) - self.sz = struct.calcsize(self.fmt) if not numbytes else numbytes - self.owners = [] - - def addfield(self, pkt, s, val): - """Add an internal value to a string""" - pack = struct.pack(self.fmt, self.i2m(pkt, val)) - if self.numbytes: - pack = pack[len(pack)-self.numbytes:] - return s+pack - - def getfield(self, pkt, s): - """Extract an internal value from a string""" - upack_data = s[:self.sz] - # prepend struct.calcsize()-len(data) bytes to satisfy struct.unpack - upack_data = '\x00'*(struct.calcsize(self.fmt)-self.sz) + upack_data - - return s[self.sz:], self.m2i(pkt, struct.unpack(self.fmt, upack_data)[0]) - - def i2m(self, pkt, x): - if x is None: - if not (self.length_of or self.count_of): - x = len(pkt.payload) - x = self.adjust(pkt, x) - return x - - if self.length_of is not None: - fld, fval = pkt.getfield_and_val(self.length_of) - f = fld.i2len(pkt, fval) - else: - fld, fval = pkt.getfield_and_val(self.count_of) - f = fld.i2count(pkt, fval) - x = self.adjust(pkt, f) - return x - - -class XBLenField(BLenField): - def i2repr(self, pkt, x): - return lhex(self.i2h(pkt, x)) - - -class XLenField(LenField): - def i2repr(self, pkt, x): - return lhex(self.i2h(pkt, x)) - - -class XFieldLenField(FieldLenField): - def i2repr(self, pkt, x): - return lhex(self.i2h(pkt, x)) - - -SSH_MESSAGE_TYPES = {0x01: "disconnect", - 0x14: "kex_init", - 0x15: "new_keys", - 0xff: "unknown"} -SSH_TYPE_BOOL = {0x00: True, - 0xff: False} - -SSH_ALGO_CIPHERS = "none,aes128-ctr,aes192-ctr,aes256-ctr,arcfour256,arcfour128,aes128-gcm@openssh.com,aes256-gcm@openssh.com,aes128-cbc,3des-cbc,blowfish-cbc,cast128-cbc,aes192-cbc,aes256-cbc,arcfour".split( - ",") -SSH_ALGO_HMACS = "none,hmac-md5-etm@openssh.com,hmac-sha1-etm@openssh.com,umac-64-etm@openssh.com,umac-128-etm@openssh.com,hmac-sha2-256-etm@openssh.com,hmac-sha2-512-etm@openssh.com,hmac-ripemd160-etm@openssh.com,hmac-sha1-96-etm@openssh.com,hmac-md5-96-etm@openssh.com,hmac-md5,hmac-sha1,umac-64@openssh.com,umac-128@openssh.com,hmac-sha2-256,hmac-sha2-512,hmac-ripemd160,hmac-sha1-96,hmac-md5-96".split( - ",") -SSH_ALGO_KEX = "none,ecdh-sha2-nistp256,ecdh-sha2-nistp384,ecdh-sha2-nistp521,diffie-hellman-group-exchange-sha256,diffie-hellman-group-exchange-sha1,diffie-hellman-group14-sha1,diffie-hellman-group1-sha1".split( - ",") -SSH_ALGO_COMPRESSION = "none,zlib,zlib@openssh.com,none".split(",") -SSH_ALGO_HOSTKEY = "none,ecdsa-sha2-nistp521,ssh-rsa,ssh-dss".split(",") - - -def ssh_name_list(name, fmt="!I", numbytes=None, default=''): - return [XBLenField("%s_length" % name, None, length_of="%s" % name, fmt=fmt, numbytes=numbytes), - StrLenField("%s" % name, default, length_from=lambda x:getattr(x, "%s_length" % name)), ] - - -class SSHIdent(Packet): - name = "SSH Ident" - fields_desc = [ - StrField("ident", "SSH-2.0-ScapySSHLayer\r\n"), - ] - - -def ssh_calculate_mac(pkt, x): - if len(x): - return x - if not pkt.mac in ('md5', 'sha-1'): - return x - return getattr(hashlib, pkt.mac)(pkt.data).digest() - - -class SSHEncryptedPacket(Packet): - name = "SSH Encrypted Packet" - fields_desc = [ - StrField("data", None), - DynamicStrField("mac", None, adjust=ssh_calculate_mac), - HintField("encryption", None), - # HintField("mac",'md5'), - HintField("compression", None), - ] - - -class SSHMessage(Packet): - name = "SSH Message" - fields_desc = [ - XBLenField("length", None, fmt="!I", adjust=lambda pkt, - x: x+2 if pkt.lastlayer().haslayer(Raw) else x+2), - XBLenField("padding_length", None, fmt="!B", adjust=lambda pkt, x: len( - pkt.lastlayer()) if pkt.lastlayer().haslayer(Raw) else 0), - ByteEnumField("type", 0xff, SSH_MESSAGE_TYPES), - ] - - -class SSHKexInit(Packet): - name = "SSH Key Exchange Init" - fields_desc = [StrFixedLenField("cookie", os.urandom(16), 16), ] \ - + ssh_name_list("kex_algorithms", default=",".join(SSH_ALGO_KEX)) \ - + ssh_name_list("server_host_key_algorithms", default=",".join(SSH_ALGO_HOSTKEY)) \ - + ssh_name_list("encryption_algorithms_client_to_server", default=",".join(SSH_ALGO_CIPHERS)) \ - + ssh_name_list("encryption_algorithms_server_to_client", default=",".join(SSH_ALGO_CIPHERS)) \ - + ssh_name_list("mac_algorithms_client_to_server", default=",".join(SSH_ALGO_HMACS)) \ - + ssh_name_list("mac_algorithms_server_to_client", default=",".join(SSH_ALGO_HMACS)) \ - + ssh_name_list("compression_algorithms_client_to_server", default=",".join(SSH_ALGO_COMPRESSION)) \ - + ssh_name_list("compression_algorithms_server_to_client", default=",".join(SSH_ALGO_COMPRESSION)) \ - + ssh_name_list("languages_client_to_server") \ - + ssh_name_list("languages_server_to_client") \ - + [ - ByteEnumField("kex_first_packet_follows", 0x00, SSH_TYPE_BOOL), - IntField("reserved", 0x00), - ] - - -SSH_DISCONNECT_REASONS = {1: 'HOST_NOT_ALLOWED_TO_CONNECT', - 2: 'PROTOCOL_ERROR', - 3: 'KEY_EXCHANGE_FAILED', - 4: 'RESERVED', - 5: 'MAC_ERROR', - 6: 'COMPRESSION_ERROR', - 7: 'SERVICE_NOT_AVAILABLE', - 8: 'PROTOCOL_VERSION_NOT_SUPPORTED', - 9: 'HOST_KEY_NOT_VERIFIABLE', - 10: 'CONNECTION_LOST', - 11: 'BY_APPLICATION', - 12: 'TOO_MANY_CONNECTIONS', - 13: 'AUTH_CANCELLED_BY_USER', - 14: 'NO_MORE_AUTH_METHODS_AVAILABLE', - 15: 'ILLEGAL_USER_NAME', - } - - -class SSHDisconnect(Packet): - name = "SSH Disconnect" - fields_desc = [ - IntEnumField("reason", 0xff, SSH_DISCONNECT_REASONS), - StrCustomTerminatorField( - "description", "", terminator="\x00\x00\x00\x00"), - StrCustomTerminatorField( - "language", "", terminator="\x00", consume_terminator=False), - ] - - -class SSH(Packet): - name = "SSH" - - def is_ascii(s): - return all(ord(c) < 128 for c in s) - - def guess_payload_class(self, payload): - - try: - if payload.startswith("SSH-"): - return SSHIdent - - dummy = SSHMessage(payload, _internal=1) - if len(payload) <= dummy.length+4: - return SSHMessage - - except: - pass - return SSHEncryptedPacket - - -# bind magic -bind_layers(TCP, SSH, dport=22) -bind_layers(TCP, SSH, sport=22) - -bind_layers(SSH, SSHMessage) -bind_layers(SSHMessage, SSHKexInit, {'type': 0x14}) -bind_layers(SSHMessage, SSHDisconnect, {'type': 0x01}) -bind_layers(SSH, SSHEncryptedPacket) diff --git a/heifip/splitter.py b/heifip/splitter.py deleted file mode 100644 index f349ac7..0000000 --- a/heifip/splitter.py +++ /dev/null @@ -1,101 +0,0 @@ -# File from pcap-splitter project -# Copyright (C) 2019 Santiago Hernandez Ramos -# For more information about the project: https://github.com/shramos/pcap-splitter - -import subprocess - - -class PcapSplitter: - """Split a .pcap file into different files.""" - - def __init__(self, pcap_path, exefile_path="PcapSplitter"): - # Checks if the PcapSplitter executable exists in path - self._check_binary(exefile_path) - self._exefile_path = exefile_path - self._pcap_path = pcap_path - - def split_by_size(self, size_bytes, dest_path, pkts_bpf_filter=""): - """Split files by size in bytes.""" - args = (self._exefile_path, "-f", self._pcap_path, "-o", dest_path, - "-m", "file-size", "-p", str(size_bytes), "-i", pkts_bpf_filter) - # Execute the PcapSplitter binary - return self._execute(args).decode() - - def split_by_count(self, count_pkts, dest_path, pkts_bpf_filter=""): - """Split files by packet count.""" - args = (self._exefile_path, "-f", self._pcap_path, "-o", dest_path, - "-m", "packet-count", "-p", str(count_pkts), "-i", pkts_bpf_filter) - # Execute the PcapSplitter binary - return self._execute(args).decode() - - def split_by_client_ip(self, dest_path, pkts_bpf_filter=""): - """split files by client IP, meaning all connections with the same client - IP will be in the same file.""" - args = (self._exefile_path, "-f", self._pcap_path, "-o", dest_path, - "-m", "client-ip", "-i", pkts_bpf_filter) - # Execute the PcapSplitter binary - return self._execute(args).decode() - - def split_by_server_ip(self, dest_path, pkts_bpf_filter=""): - """split files by server IP, meaning all connections with the same server - IP will be in the same file.""" - args = (self._exefile_path, "-f", self._pcap_path, "-o", dest_path, - "-m", "server-ip", "-i", pkts_bpf_filter) - # Execute the PcapSplitter binary - return self._execute(args).decode() - - def split_by_server_port(self, dest_path, pkts_bpf_filter=""): - """split files by IP src and dst (2-tuple), meaning all connections with - the same IPs will be in the same file.""" - args = (self._exefile_path, "-f", self._pcap_path, "-o", dest_path, - "-m", "server-port", "-i", pkts_bpf_filter) - # Execute the PcapSplitter binary - return self._execute(args).decode() - - def split_by_ip_src_dst(self, dest_path, pkts_bpf_filter=""): - """split files by IP src and dst (2-tuple), meaning all connections with - the same IPs will be in the same file.""" - args = (self._exefile_path, "-f", self._pcap_path, "-o", dest_path, - "-m", "ip-src-dst", "-i", pkts_bpf_filter) - # Execute the PcapSplitter binary - return self._execute(args).decode() - - def split_by_session(self, dest_path, pkts_bpf_filter=""): - """split files by connection (5-tuple), meaning all packets of a - connection will be in the same file.""" - args = (self._exefile_path, "-f", self._pcap_path, "-o", dest_path, - "-m", "connection", "-i", pkts_bpf_filter) - # Execute the PcapSplitter binary - return self._execute(args).decode() - - def split_by_filter(self, bpf_filter, dest_path, pkts_bpf_filter=""): - """split file into two files: one that contains all packets matching the - given BPF filter (file #0) and one that contains the rest of the packets - (file #1).""" - args = (self._exefile_path, "-f", self._pcap_path, "-o", dest_path, - "-m", "bpf-filter", "-p", bpf_filter, "-i", pkts_bpf_filter) - # Execute the PcapSplitter binary - return self._execute(args).decode() - - def split_by_round_robin(self, n_files, dest_path, pkts_bpf_filter=""): - """split the file in a round-robin manner - each packet to a different - file.""" - args = (self._exefile_path, "-f", self._pcap_path, "-o", dest_path, - "-m", "round-robin", "-p", str(n_files), "-i", pkts_bpf_filter) - # Execute the PcapSplitter binary - return self._execute(args).decode() - - def _execute(self, args): - popen = subprocess.Popen(args, stdout=subprocess.PIPE) - popen.wait() - return popen.stdout.read() - - def _check_binary(self, exefile_path): - try: - subprocess.Popen(exefile_path, stdout=subprocess.PIPE) - except FileNotFoundError: - print("ERROR: PcapSplitter executable not found in the OS. Please \ - check that PcapPlusPlus is correctly installed and PcapSplitter \ - executable is in the path, or indicate the path of the PcapSplitter \ - executable by using the exefile_path parameter when instantiating \ - the PcapSplitter class.\n") \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index 1f01c20..0000000 --- a/pyproject.toml +++ /dev/null @@ -1,74 +0,0 @@ -[build-system] -requires = [ "setuptools>=62.0.0" ] -build-backend = "setuptools.build_meta" - -[project] -name = "heifip" -version = "1.1.1" -authors = [ - { name="Stefan Machmeier", email="stefan.machmeier@uni-heidelberg.de" }, -] -description = "A tool to convert network traffic into images for ML use cases." -readme = "README.md" -license = { file="LICENSE" } -requires-python = ">=3.7" -keywords = [ "network" ] -classifiers = [ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: European Union Public Licence 1.2 (EUPL 1.2)", - "Operating System :: OS Independent", - "Operating System :: OS Independent", - 'Intended Audience :: Telecommunications Industry', - 'Intended Audience :: Information Technology', - 'Intended Audience :: System Administrators', - 'Intended Audience :: Science/Research', - 'Intended Audience :: Developers', - 'Programming Language :: Python :: 3 :: Only', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'Programming Language :: Python :: 3.10', - 'Programming Language :: Python :: 3.11', - 'Topic :: Security', - 'Topic :: Internet :: Log Analysis', - 'Topic :: System :: Networking :: Monitoring', - 'Topic :: Scientific/Engineering :: Artificial Intelligence' -] - -[project.scripts] -fip = "heifip.cli:cli" - -[project.urls] -"Homepage" = "https://github.com/stefanDeveloper/heiFIP" -"Bug Tracker" = "https://github.com/stefanDeveloper/heiFIP/issues" - -[tool.setuptools.packages.find] -include = [ - "heifip*", -] -exclude = [ - "test*", - "assets*", -] - -[tool.poetry] -name = "heiFIP" -version = "1.1.1" -description = "A tool to convert network traffic into images for ML use cases." -license = "EUPL-1.2" -authors = ["Stefan Machmeier"] - -[tool.poetry.dependencies] -scapy = '>=2.5.0' -Pillow = '>=9.4.0' -numpy = '>=1.19.5' -click = '>=8.1.3' -tqdm = '>=4.64.1' -click-help-colors = '>=0.9.1' -pytest = '>=7.2.2' -cryptography = '>=1.0.0' - -[[tool.poetry_bumpversion.replacements]] -files = ["heifip/__init__.py", "heifip/exceptions/__init__.py", "heifip/images/__init__.py", "heifip/layers/__init__.py", "heifip/plugins/__init__.py", "heifip/protocols/__init__.py"] -search = '__version__ = "{current_version}"' -replace = '__version__ = "{new_version}"' \ No newline at end of file diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 81f0c63..0000000 --- a/requirements.txt +++ /dev/null @@ -1,10 +0,0 @@ -# pandas>=1.3.5; platform.python_implementation == 'CPython' -# pandas<=1.3.5; platform.python_implementation == 'PyPy' -scapy>=2.5.0 -Pillow>=9.4.0 -numpy>=1.19.5 -click>=8.1.3 -tqdm>=4.64.1 -click-help-colors>=0.9.1 -pytest>=7.2.2 -cryptography \ No newline at end of file diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 2f283bb..0000000 --- a/setup.cfg +++ /dev/null @@ -1,9 +0,0 @@ -[bdist_wheel] -universal = 0 - -[flake8] -exclude = docs -max-line-length = 255 - - -[aliases] \ No newline at end of file diff --git a/split.py b/split.py deleted file mode 100644 index 0c422f2..0000000 --- a/split.py +++ /dev/null @@ -1,49 +0,0 @@ -import fnmatch -import os -import re - -from heifip.splitter import PcapSplitter - -output_dir = "/home/smachmeier/data/test-data" -input_dir = "/home/smachmeier/data/test-pcaps" -orientation = { - "Cridex": "malware", - "Htbot": "malware", - "Geodo": "malware", - "Miuref": "malware", - "Neris": "malware", - "Nsis-ay": "malware", - "Shifu": "malware", - "Tinba": "malware", - "Virut": "malware", - "Weibo": "malware", - "Zeus": "malware", - "BitTorrent": "benign", - "Facetime": "benign", - "FTP": "benign", - "Gmail": "benign", - "MySQL": "benign", - "Outlook": "benign", - "Skype": "benign", - "SMB": "benign", - "WorldOfWarcraft": "benign" -} - -for root, dirnames, filenames in os.walk(input_dir): - for filename in fnmatch.filter(filenames, "*.pcap"): - match = os.path.join(root, filename) - sub_dir = match.replace(input_dir, "") - # sub_dir = re.sub("(-[0-9])?.pcap", "", sub_dir) - # sub_dir = sub_dir.replace("/", "") - # print(sub_dir.split('/')[1]) - # sub_dir = "malware" - # sub_dir = orientation[sub_dir] - if not os.path.exists(f"{output_dir}/{sub_dir}"): - try: - os.makedirs(f"{output_dir}/{sub_dir}") - except: - pass - ps = PcapSplitter(match) - # ps.split_by_count(10000, "/home/smachmeier/data/test-pcaps", pkts_bpf_filter="ip and (tcp or udp) and not (port 67 or port 68 or port 546 or port 547)") - ps.split_by_session(f"{output_dir}/{sub_dir.split('/')[1]}", pkts_bpf_filter="ip and (tcp or udp) and not (port 67 or port 68 or port 546 or port 547)") - diff --git a/tests.py b/tests.py deleted file mode 100644 index 778da16..0000000 --- a/tests.py +++ /dev/null @@ -1,170 +0,0 @@ -import fnmatch -import os - -import pytest -from scapy.all import rdpcap - -from heifip.extractor import FIPExtractor -from heifip.images.flow import FlowImage -from heifip.images.markovchain import (MarkovTransitionMatrixFlow, - MarkovTransitionMatrixPacket) -from heifip.images.packet import PacketImage -from heifip.layers import PacketProcessorType - -TEST_FOLDER = "./tests/pcaps" -OUTPUT_DIR = "./tests/images" - - -def get_files(): - assert os.path.exists(TEST_FOLDER) - packets = [] - for root, dirnames, filenames in os.walk(TEST_FOLDER): - for filename in fnmatch.filter(filenames, "*.pcap"): - match = os.path.join(root, filename) - sub_dir = match.replace(TEST_FOLDER, "") - packets.append(rdpcap(match)) - return packets[0:25] # Otherwise we break Python... - - -@pytest.mark.parametrize('packet', get_files()) -@pytest.mark.parametrize("append", [True, False]) -@pytest.mark.parametrize("fill", [0, 255]) -@pytest.mark.parametrize("dim", [4, 16]) -@pytest.mark.parametrize( - "min_packets_per_flow", [0, 4] -) -@pytest.mark.parametrize("max_image_dim", [0, 16]) -@pytest.mark.parametrize("min_image_dim", [0, 16]) -@pytest.mark.parametrize("remove_duplicates", [True, False]) -@pytest.mark.parametrize( - "preprocessing_type", [PacketProcessorType.HEADER, PacketProcessorType.NONE] -) -def test_extractor_flow( - packet, - append, - fill, - dim, - min_packets_per_flow, - max_image_dim, - min_image_dim, - remove_duplicates, - preprocessing_type, -): - extractor = FIPExtractor() - extractor.create_image_from_packet( - packet, - preprocessing_type, - FlowImage, - min_image_dim, - max_image_dim, - min_packets_per_flow, - 0, - remove_duplicates, - dim, - fill, - append, - ) - # TODO: Assert matrix... if functions worked fine - -@pytest.mark.parametrize('packet', get_files()) -@pytest.mark.parametrize( - "min_packets_per_flow", [0, 4] -) -@pytest.mark.parametrize( - "max_packets_per_flow", [0, 4] -) -@pytest.mark.parametrize("max_image_dim", [0, 16]) -@pytest.mark.parametrize("min_image_dim", [0, 16]) -@pytest.mark.parametrize("remove_duplicates", [True, False]) -@pytest.mark.parametrize( - "preprocessing_type", [PacketProcessorType.HEADER, PacketProcessorType.NONE] -) -def test_extractor_markovflow( - packet, - min_packets_per_flow, - max_packets_per_flow, - max_image_dim, - min_image_dim, - remove_duplicates, - preprocessing_type, -): - extractor = FIPExtractor() - extractor.create_image_from_packet( - packet, - preprocessing_type, - MarkovTransitionMatrixFlow, - min_image_dim, - max_image_dim, - min_packets_per_flow, - max_packets_per_flow, - remove_duplicates, - ) - # TODO: Assert matrix... if functions worked fine - -@pytest.mark.parametrize('packet', get_files()) -@pytest.mark.parametrize( - "min_packets_per_flow", [0, 4] -) -@pytest.mark.parametrize("max_image_dim", [0, 16]) -@pytest.mark.parametrize("min_image_dim", [0, 16]) -@pytest.mark.parametrize("remove_duplicates", [True, False]) -@pytest.mark.parametrize( - "preprocessing_type", [PacketProcessorType.HEADER, PacketProcessorType.NONE] -) -def test_extractor_markovpacket( - packet, - min_packets_per_flow, - max_image_dim, - min_image_dim, - remove_duplicates, - preprocessing_type, -): - extractor = FIPExtractor() - extractor.create_image_from_packet( - packet, - preprocessing_type, - MarkovTransitionMatrixPacket, - min_image_dim, - max_image_dim, - min_packets_per_flow, - 0, - remove_duplicates, - ) - # TODO: Assert matrix... if functions worked fine - -@pytest.mark.parametrize('packet', get_files()) -@pytest.mark.parametrize("fill", [0, 255]) -@pytest.mark.parametrize("dim", [4, 16]) -@pytest.mark.parametrize("max_image_dim", [0, 16]) -@pytest.mark.parametrize("min_image_dim", [0, 16]) -@pytest.mark.parametrize("remove_duplicates", [True, False]) -@pytest.mark.parametrize( - "preprocessing_type", [PacketProcessorType.HEADER, PacketProcessorType.NONE] -) -def test_extractor_packet( - packet, - fill, - dim, - max_image_dim, - min_image_dim, - remove_duplicates, - preprocessing_type, -): - extractor = FIPExtractor() - extractor.create_image_from_packet( - packet, - preprocessing_type, - PacketImage, - min_image_dim, - max_image_dim, - 0, - 0, - remove_duplicates, - dim, - fill, - False - ) - # TODO: Assert matrix... if functions worked fine - -if __name__ == "__main__": - pytest.main()