增加windows端屏幕捕获编码demo

2025-12-18 23:07:14 +08:00
parent 30f45f8397
commit e13885266b
10 changed files with 688 additions and 48 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,63 +1,55 @@
-# Build directories
-build/
-out/
-bin/
-lib/
-*.exe
-*.dll
+# Prerequisites
+*.d
+
+# Compiled Object files
+*.slo
+*.lo
+*.o
+*.obj
+
+# Precompiled Headers
+*.gch
+*.pch
+
+# Compiled Dynamic libraries
 *.so
 *.dylib
+*.dll
+
+# Fortran module files
+*.mod
+*.smod
+
+# Compiled Static libraries
+*.lai
+*.la
 *.a
 *.lib

+# Executables
+*.exe
+*.out
+*.app
+
 # CMake
+cmake_install.cmake
 CMakeCache.txt
 CMakeFiles/
-cmake_install.cmake
 Makefile
-*.cmake
-!CMakeLists.txt
+build/

-# IDE
+# Visual Studio
 .vs/
+*.vcxproj
+*.vcxproj.filters
+*.vcxproj.user
+*.sln
+*.sdf
+*.opensdf
+*.suo
+
+# Editors
 .vscode/
 .idea/
 *.swp
-*.swo
 *~
-.DS_Store
-
-# Android
-*.apk
-*.ap_
-*.dex
-*.class
-local.properties
-.gradle/
-build/
-.idea/
-*.iml
-
-# Windows
-Thumbs.db
-ehthumbs.db
-Desktop.ini
-
-# Dependencies
-third_party/
-external/
-vcpkg_installed/
-
-# Logs
-*.log
-
-# Temporary files
-*.tmp
-*.temp
-*.bak
-*.backup
-
-# FlatBuffers generated files (optional, if you want to track them)
-# *.generated.h
-# *.generated.cpp
-
--- a/demo/windows_sender/CMakeLists.txt
+++ b/demo/windows_sender/CMakeLists.txt
@@ -0,0 +1,53 @@
+cmake_minimum_required(VERSION 3.10)
+project(WindowsSenderDemo)
+
+set(CMAKE_CXX_STANDARD 17)
+
+# Windows specific
+if(WIN32)
+    add_definitions(-DUNICODE -D_UNICODE -DWIN32_LEAN_AND_MEAN -DNOMINMAX)
+endif()
+
+# Source files
+set(SOURCES
+    main.cpp
+    ScreenCapture.cpp
+    ScreenCapture.h
+    VideoEncoder.cpp
+    VideoEncoder.h
+    NetworkSender.cpp
+    NetworkSender.h
+)
+
+add_executable(WindowsSenderDemo ${SOURCES})
+
+# Libraries
+# Media Foundation libraries are removed as we switch to FFmpeg
+# target_link_libraries(WindowsSenderDemo d3d11 dxgi mf mfplat mfuuid ws2_32 mfreadwrite)
+
+# Find FFmpeg
+# You might need to set FFMPEG_ROOT to your FFmpeg installation path
+# e.g. cmake .. -DFFMPEG_ROOT="C:/ffmpeg"
+
+find_path(AVCODEC_INCLUDE_DIR libavcodec/avcodec.h PATHS ${FFMPEG_ROOT}/include)
+find_library(AVCODEC_LIBRARY avcodec PATHS ${FFMPEG_ROOT}/lib)
+find_library(AVFORMAT_LIBRARY avformat PATHS ${FFMPEG_ROOT}/lib)
+find_library(AVUTIL_LIBRARY avutil PATHS ${FFMPEG_ROOT}/lib)
+find_library(SWSCALE_LIBRARY swscale PATHS ${FFMPEG_ROOT}/lib)
+
+if (AVCODEC_INCLUDE_DIR AND AVCODEC_LIBRARY)
+    include_directories(${AVCODEC_INCLUDE_DIR})
+    message(STATUS "Found FFmpeg: ${AVCODEC_INCLUDE_DIR}")
+else()
+    message(FATAL_ERROR "FFmpeg not found! Please set FFMPEG_ROOT to your FFmpeg installation.")
+endif()
+
+target_link_libraries(WindowsSenderDemo
+    d3d11
+    dxgi
+    ws2_32
+    ${AVCODEC_LIBRARY}
+    ${AVFORMAT_LIBRARY}
+    ${AVUTIL_LIBRARY}
+    ${SWSCALE_LIBRARY}
+)
--- a/demo/windows_sender/NetworkSender.cpp
+++ b/demo/windows_sender/NetworkSender.cpp
@@ -0,0 +1,86 @@
+#include "NetworkSender.h"
+#include <iostream>
+#include <algorithm>
+
+struct TransportHeader {
+    uint32_t frameId;
+    uint16_t fragId;
+    uint16_t totalFrags;
+};
+
+NetworkSender::NetworkSender() {
+    WSADATA wsaData;
+    WSAStartup(MAKEWORD(2, 2), &wsaData);
+}
+
+NetworkSender::~NetworkSender() {
+    if (socket_ != INVALID_SOCKET) {
+        closesocket(socket_);
+    }
+    WSACleanup();
+}
+
+bool NetworkSender::Initialize(const std::string& ip, int port) {
+    socket_ = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
+    if (socket_ == INVALID_SOCKET) return false;
+
+    // Set buffer size
+    int sndBuf = 1024 * 1024; // 1MB
+    setsockopt(socket_, SOL_SOCKET, SO_SNDBUF, (char*)&sndBuf, sizeof(sndBuf));
+
+    destAddr_.sin_family = AF_INET;
+    destAddr_.sin_port = htons(port);
+    inet_pton(AF_INET, ip.c_str(), &destAddr_.sin_addr);
+
+    return true;
+}
+
+bool NetworkSender::SendFrame(const std::vector<uint8_t>& data, uint64_t timestamp, int width, int height, bool isKeyFrame) {
+    // 1. Serialize Frame Info
+    PacketHeader header;
+    header.timestamp = timestamp;
+    header.width = width;
+    header.height = height;
+    header.frameType = isKeyFrame ? 0 : 1;
+    header.dataSize = (uint32_t)data.size();
+
+    std::vector<uint8_t> buffer;
+    buffer.resize(sizeof(PacketHeader) + data.size());
+    memcpy(buffer.data(), &header, sizeof(PacketHeader));
+    memcpy(buffer.data() + sizeof(PacketHeader), data.data(), data.size());
+
+    // 2. Fragment and Send
+    const int MTU = 1400; // Safe MTU
+    const int HEADER_SIZE = sizeof(TransportHeader);
+    const int PAYLOAD_SIZE = MTU - HEADER_SIZE;
+    
+    size_t totalSize = buffer.size();
+    size_t totalFrags = (totalSize + PAYLOAD_SIZE - 1) / PAYLOAD_SIZE;
+    
+    static uint32_t frameId = 0;
+    frameId++;
+
+    for (size_t i = 0; i < totalFrags; ++i) {
+        TransportHeader transHeader;
+        transHeader.frameId = frameId;
+        transHeader.fragId = (uint16_t)i;
+        transHeader.totalFrags = (uint16_t)totalFrags;
+
+        size_t offset = i * PAYLOAD_SIZE;
+        size_t chunkSize = std::min((size_t)PAYLOAD_SIZE, totalSize - offset);
+
+        std::vector<uint8_t> packet;
+        packet.resize(HEADER_SIZE + chunkSize);
+        
+        memcpy(packet.data(), &transHeader, HEADER_SIZE);
+        memcpy(packet.data() + HEADER_SIZE, buffer.data() + offset, chunkSize);
+
+        int sent = sendto(socket_, (const char*)packet.data(), (int)packet.size(), 0, (sockaddr*)&destAddr_, sizeof(destAddr_));
+        if (sent < 0) {
+            // std::cerr << "Send failed" << std::endl;
+            // Continue sending other fragments anyway
+        }
+    }
+
+    return true;
+}
--- a/demo/windows_sender/NetworkSender.h
+++ b/demo/windows_sender/NetworkSender.h
@@ -0,0 +1,30 @@
+#pragma once
+
+#include <winsock2.h>
+#include <ws2tcpip.h>
+#include <string>
+#include <vector>
+#include <cstdint>
+
+#pragma comment(lib, "ws2_32.lib")
+
+struct PacketHeader {
+    uint64_t timestamp;
+    uint32_t width;
+    uint32_t height;
+    uint32_t frameType; // 0=I, 1=P
+    uint32_t dataSize;
+};
+
+class NetworkSender {
+public:
+    NetworkSender();
+    ~NetworkSender();
+
+    bool Initialize(const std::string& ip, int port);
+    bool SendFrame(const std::vector<uint8_t>& data, uint64_t timestamp, int width, int height, bool isKeyFrame);
+
+private:
+    SOCKET socket_ = INVALID_SOCKET;
+    sockaddr_in destAddr_;
+};
--- a/demo/windows_sender/README.md
+++ b/demo/windows_sender/README.md
@@ -0,0 +1,58 @@
+# Windows Sender Demo
+
+This is a simplified demo of the Windows Host sender for DisplayFlow.
+It demonstrates:
+1. Screen Capture using Desktop Duplication API (DXGI).
+2. Hardware H.264 Encoding using Media Foundation (MF).
+3. Network Transmission using UDP.
+
+## Prerequisites
+
+- Windows 10/11
+- Visual Studio 2019 or later (with C++ Desktop Development)
+- CMake 3.10+
+
+## Build
+
+1. Open a terminal (Developer Command Prompt for VS).
+2. Navigate to this directory:
+   ```cmd
+   cd demo\windows_sender
+   ```
+3. Create a build directory:
+   ```cmd
+   mkdir build
+   cd build
+   ```
+4. Configure and Build:
+   ```cmd
+   cmake ..
+   cmake --build . --config Release
+   ```
+
+## Run
+
+Run the executable with target IP and Port (optional):
+
+```cmd
+.\Release\WindowsSenderDemo.exe <TargetIP> <Port>
+```
+
+Example:
+```cmd
+.\Release\WindowsSenderDemo.exe 192.168.1.100 8888
+```
+
+## Implementation Details
+
+- **ScreenCapture**: Uses `IDXGIOutputDuplication` to capture desktop frames.
+- **VideoEncoder**: Uses `IMFTransform` (Media Foundation) to encode frames to H.264. 
+  - *Note*: This demo attempts to feed RGB32 textures to the encoder. If the hardware encoder only supports NV12, conversion logic is needed (not fully implemented in this minimal demo).
+- **NetworkSender**: Fragments the H.264 stream into UDP packets (MTU ~1400 bytes) and sends them to the target.
+
+## Protocol
+
+The demo uses a simple custom protocol for feasibility verification:
+- **Packet Header** (Network Layer): `FrameID` (4B), `FragID` (2B), `TotalFrags` (2B).
+- **Frame Header** (Application Layer, in first fragment/reassembled): `Timestamp` (8B), `Width` (4B), `Height` (4B), `Type` (4B), `Size` (4B).
+- **Payload**: H.264 NAL units.
--- a/demo/windows_sender/ScreenCapture.cpp
+++ b/demo/windows_sender/ScreenCapture.cpp
@@ -0,0 +1,120 @@
+#include "ScreenCapture.h"
+#include <iostream>
+
+ScreenCapture::ScreenCapture() = default;
+
+ScreenCapture::~ScreenCapture() {
+    if (frame_acquired_) {
+        ReleaseFrame();
+    }
+}
+
+bool ScreenCapture::Initialize() {
+    HRESULT hr = S_OK;
+
+    // Create D3D11 Device and Context
+    D3D_FEATURE_LEVEL featureLevels[] = {
+        D3D_FEATURE_LEVEL_11_1,
+        D3D_FEATURE_LEVEL_11_0,
+    };
+    D3D_FEATURE_LEVEL featureLevel;
+
+    hr = D3D11CreateDevice(
+        nullptr,
+        D3D_DRIVER_TYPE_HARDWARE,
+        nullptr,
+        D3D11_CREATE_DEVICE_BGRA_SUPPORT, // Needed for GDI compatibility if used, but generally good for D2D
+        featureLevels,
+        ARRAYSIZE(featureLevels),
+        D3D11_SDK_VERSION,
+        &device_,
+        &featureLevel,
+        &context_
+    );
+
+    if (FAILED(hr)) {
+        std::cerr << "Failed to create D3D11 device: " << std::hex << hr << std::endl;
+        return false;
+    }
+
+    // Get DXGI Device
+    ComPtr<IDXGIDevice> dxgiDevice;
+    hr = device_.As(&dxgiDevice);
+    if (FAILED(hr)) return false;
+
+    // Get DXGI Adapter
+    ComPtr<IDXGIAdapter> dxgiAdapter;
+    hr = dxgiDevice->GetAdapter(&dxgiAdapter);
+    if (FAILED(hr)) return false;
+
+    // Get DXGI Output (Monitor 0)
+    ComPtr<IDXGIOutput> dxgiOutput;
+    hr = dxgiAdapter->EnumOutputs(0, &dxgiOutput);
+    if (FAILED(hr)) {
+        std::cerr << "Failed to get DXGI output (monitor connected?)" << std::endl;
+        return false;
+    }
+
+    // QI for Output1 to support Duplication
+    ComPtr<IDXGIOutput1> dxgiOutput1;
+    hr = dxgiOutput.As(&dxgiOutput1);
+    if (FAILED(hr)) return false;
+
+    // Create Desktop Duplication
+    hr = dxgiOutput1->DuplicateOutput(device_.Get(), &duplication_);
+    if (FAILED(hr)) {
+        std::cerr << "Failed to duplicate output. Error: " << std::hex << hr << std::endl;
+        // Common errors: E_ACCESSDENIED (already duplicated), DXGI_ERROR_UNSUPPORTED (switchable graphics)
+        return false;
+    }
+
+    return true;
+}
+
+bool ScreenCapture::CaptureFrame(ComPtr<ID3D11Texture2D>& texture) {
+    if (frame_acquired_) {
+        ReleaseFrame();
+    }
+
+    DXGI_OUTDUPL_FRAME_INFO frameInfo;
+    ComPtr<IDXGIResource> resource;
+    
+    // Timeout 100ms
+    HRESULT hr = duplication_->AcquireNextFrame(100, &frameInfo, &resource);
+    
+    if (hr == DXGI_ERROR_WAIT_TIMEOUT) {
+        return false; // No new frame
+    }
+    
+    if (FAILED(hr)) {
+        // Maybe device lost or resolution changed
+        std::cerr << "AcquireNextFrame failed: " << std::hex << hr << std::endl;
+        return false;
+    }
+
+    frame_acquired_ = true;
+
+    // Only process if we have a desktop image update
+    if (frameInfo.LastPresentTime.QuadPart == 0) {
+        return false; // Only cursor moved or something else, no image update? 
+        // Actually AcquireNextFrame returns even if only cursor updated.
+        // But for video stream we might want to send anyway or skip. 
+        // If resource is null, it means no desktop image update.
+    }
+    
+    if (!resource) {
+        return false;
+    }
+
+    hr = resource.As(&texture);
+    if (FAILED(hr)) return false;
+
+    return true;
+}
+
+void ScreenCapture::ReleaseFrame() {
+    if (frame_acquired_ && duplication_) {
+        duplication_->ReleaseFrame();
+        frame_acquired_ = false;
+    }
+}
--- a/demo/windows_sender/ScreenCapture.h
+++ b/demo/windows_sender/ScreenCapture.h
@@ -0,0 +1,27 @@
+#pragma once
+
+#include <d3d11.h>
+#include <dxgi1_2.h>
+#include <wrl/client.h>
+#include <memory>
+
+using Microsoft::WRL::ComPtr;
+
+class ScreenCapture {
+public:
+    ScreenCapture();
+    ~ScreenCapture();
+
+    bool Initialize();
+    bool CaptureFrame(ComPtr<ID3D11Texture2D>& texture);
+    void ReleaseFrame();
+    
+    ID3D11Device* GetDevice() const { return device_.Get(); }
+    ID3D11DeviceContext* GetContext() const { return context_.Get(); }
+
+private:
+    ComPtr<ID3D11Device> device_;
+    ComPtr<ID3D11DeviceContext> context_;
+    ComPtr<IDXGIOutputDuplication> duplication_;
+    bool frame_acquired_ = false;
+};
--- a/demo/windows_sender/VideoEncoder.cpp
+++ b/demo/windows_sender/VideoEncoder.cpp
@@ -0,0 +1,146 @@
+#include "VideoEncoder.h"
+#include <iostream>
+
+VideoEncoder::VideoEncoder() = default;
+
+VideoEncoder::~VideoEncoder() {
+    if (swsContext_) sws_freeContext(swsContext_);
+    if (codecContext_) avcodec_free_context(&codecContext_);
+    if (frame_) av_frame_free(&frame_);
+    if (packet_) av_packet_free(&packet_);
+    if (stagingTexture_) stagingTexture_.Reset();
+}
+
+bool VideoEncoder::Initialize(ID3D11Device* device, int width, int height, int fps, int bitrate) {
+    device_ = device;
+    device_->GetImmediateContext(&context_);
+    width_ = width;
+    height_ = height;
+
+    // 1. Create Staging Texture for CPU access
+    D3D11_TEXTURE2D_DESC desc = {};
+    desc.Width = width;
+    desc.Height = height;
+    desc.MipLevels = 1;
+    desc.ArraySize = 1;
+    desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
+    desc.SampleDesc.Count = 1;
+    desc.Usage = D3D11_USAGE_STAGING;
+    desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
+    
+    HRESULT hr = device_->CreateTexture2D(&desc, nullptr, &stagingTexture_);
+    if (FAILED(hr)) {
+        std::cerr << "Failed to create staging texture" << std::endl;
+        return false;
+    }
+
+    // 2. Initialize FFmpeg
+    const AVCodec* codec = avcodec_find_encoder(AV_CODEC_ID_H264);
+    if (!codec) {
+        std::cerr << "Codec H.264 not found" << std::endl;
+        return false;
+    }
+
+    codecContext_ = avcodec_alloc_context3(codec);
+    if (!codecContext_) {
+        std::cerr << "Could not allocate video codec context" << std::endl;
+        return false;
+    }
+
+    codecContext_->bit_rate = bitrate;
+    codecContext_->width = width;
+    codecContext_->height = height;
+    codecContext_->time_base = {1, fps};
+    codecContext_->framerate = {fps, 1};
+    codecContext_->gop_size = 10;
+    codecContext_->max_b_frames = 1;
+    codecContext_->pix_fmt = AV_PIX_FMT_YUV420P;
+
+    // Set H.264 specific options (latency vs quality)
+    av_opt_set(codecContext_->priv_data, "preset", "ultrafast", 0);
+    av_opt_set(codecContext_->priv_data, "tune", "zerolatency", 0);
+
+    if (avcodec_open2(codecContext_, codec, nullptr) < 0) {
+        std::cerr << "Could not open codec" << std::endl;
+        return false;
+    }
+
+    frame_ = av_frame_alloc();
+    if (!frame_) {
+        std::cerr << "Could not allocate video frame" << std::endl;
+        return false;
+    }
+    frame_->format = codecContext_->pix_fmt;
+    frame_->width = codecContext_->width;
+    frame_->height = codecContext_->height;
+
+    if (av_frame_get_buffer(frame_, 32) < 0) {
+        std::cerr << "Could not allocate the video frame data" << std::endl;
+        return false;
+    }
+
+    packet_ = av_packet_alloc();
+    if (!packet_) {
+        std::cerr << "Could not allocate packet" << std::endl;
+        return false;
+    }
+
+    return true;
+}
+
+bool VideoEncoder::EncodeFrame(ID3D11Texture2D* texture, std::vector<uint8_t>& outputData, bool& isKeyFrame) {
+    if (!texture || !stagingTexture_ || !context_) return false;
+
+    // 1. Copy GPU texture to Staging texture
+    context_->CopyResource(stagingTexture_.Get(), texture);
+
+    // 2. Map Staging texture to read data
+    D3D11_MAPPED_SUBRESOURCE mapped = {};
+    HRESULT hr = context_->Map(stagingTexture_.Get(), 0, D3D11_MAP_READ, 0, &mapped);
+    if (FAILED(hr)) return false;
+
+    // 3. Convert BGRA to YUV420P
+    if (!swsContext_) {
+        swsContext_ = sws_getContext(
+            width_, height_, AV_PIX_FMT_BGRA,
+            width_, height_, AV_PIX_FMT_YUV420P,
+            SWS_BILINEAR, nullptr, nullptr, nullptr
+        );
+    }
+
+    uint8_t* srcSlice[] = { (uint8_t*)mapped.pData };
+    int srcStride[] = { (int)mapped.RowPitch };
+
+    // We need to handle potential padding in mapped.RowPitch vs width*4
+    // FFmpeg handles strides correctly.
+
+    sws_scale(swsContext_, srcSlice, srcStride, 0, height_, frame_->data, frame_->linesize);
+
+    context_->Unmap(stagingTexture_.Get(), 0);
+
+    // 4. Encode
+    frame_->pts = pts_++;
+
+    int ret = avcodec_send_frame(codecContext_, frame_);
+    if (ret < 0) {
+        std::cerr << "Error sending a frame for encoding" << std::endl;
+        return false;
+    }
+
+    while (ret >= 0) {
+        ret = avcodec_receive_packet(codecContext_, packet_);
+        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
+            break;
+        else if (ret < 0) {
+            std::cerr << "Error during encoding" << std::endl;
+            return false;
+        }
+
+        outputData.insert(outputData.end(), packet_->data, packet_->data + packet_->size);
+        if (packet_->flags & AV_PKT_FLAG_KEY) isKeyFrame = true;
+        
+        av_packet_unref(packet_);
+    }
+
+    return true;
+}
--- a/demo/windows_sender/VideoEncoder.h
+++ b/demo/windows_sender/VideoEncoder.h
@@ -0,0 +1,38 @@
+#pragma once
+
+#include <d3d11.h>
+#include <wrl/client.h>
+#include <vector>
+#include <cstdint>
+
+extern "C" {
+#include <libavcodec/avcodec.h>
+#include <libavutil/imgutils.h>
+#include <libavutil/opt.h>
+#include <libswscale/swscale.h>
+}
+
+using Microsoft::WRL::ComPtr;
+
+class VideoEncoder {
+public:
+    VideoEncoder();
+    ~VideoEncoder();
+
+    bool Initialize(ID3D11Device* device, int width, int height, int fps, int bitrate);
+    bool EncodeFrame(ID3D11Texture2D* texture, std::vector<uint8_t>& outputData, bool& isKeyFrame);
+
+private:
+    ID3D11Device* device_ = nullptr;
+    ID3D11DeviceContext* context_ = nullptr;
+    ComPtr<ID3D11Texture2D> stagingTexture_;
+
+    AVCodecContext* codecContext_ = nullptr;
+    AVFrame* frame_ = nullptr;
+    AVPacket* packet_ = nullptr;
+    SwsContext* swsContext_ = nullptr;
+
+    int width_ = 0;
+    int height_ = 0;
+    int pts_ = 0;
+};
--- a/demo/windows_sender/main.cpp
+++ b/demo/windows_sender/main.cpp
@@ -0,0 +1,90 @@
+#include "NetworkSender.h"
+#include "ScreenCapture.h"
+#include "VideoEncoder.h"
+#include <iostream>
+#include <thread>
+#include <chrono>
+
+int main(int argc, char* argv[]) {
+    std::string ip = "127.0.0.1";
+    int port = 8888;
+
+    if (argc > 1) ip = argv[1];
+    if (argc > 2) port = std::stoi(argv[2]);
+
+    std::cout << "Starting Windows Sender Demo..." << std::endl;
+    std::cout << "Target: " << ip << ":" << port << std::endl;
+
+    ScreenCapture capture;
+    if (!capture.Initialize()) {
+        std::cerr << "Failed to initialize Screen Capture" << std::endl;
+        return 1;
+    }
+
+    // Get screen size
+    D3D11_TEXTURE2D_DESC desc;
+    ComPtr<ID3D11Texture2D> frame;
+    // Capture one frame to get size
+    std::cout << "Waiting for first frame..." << std::endl;
+    while (!capture.CaptureFrame(frame)) {
+        std::this_thread::sleep_for(std::chrono::milliseconds(10));
+    }
+    frame->GetDesc(&desc);
+    capture.ReleaseFrame();
+
+    int width = desc.Width;
+    int height = desc.Height;
+    std::cout << "Screen Size: " << width << "x" << height << std::endl;
+
+    VideoEncoder encoder;
+    if (!encoder.Initialize(capture.GetDevice(), width, height, 60, 4000000)) { // 4Mbps
+        std::cerr << "Failed to initialize Video Encoder" << std::endl;
+        return 1;
+    }
+
+    NetworkSender sender;
+    if (!sender.Initialize(ip, port)) {
+        std::cerr << "Failed to initialize Network Sender" << std::endl;
+        return 1;
+    }
+
+    std::cout << "Streaming started. Press Ctrl+C to stop." << std::endl;
+
+    int frameCount = 0;
+    auto lastTime = std::chrono::high_resolution_clock::now();
+
+    while (true) {
+        ComPtr<ID3D11Texture2D> texture;
+        if (capture.CaptureFrame(texture)) {
+            std::vector<uint8_t> encodedData;
+            bool isKeyFrame = false;
+            
+            if (encoder.EncodeFrame(texture.Get(), encodedData, isKeyFrame)) {
+                if (!encodedData.empty()) {
+                    // Current timestamp in ms
+                    auto now = std::chrono::high_resolution_clock::now();
+                    uint64_t timestamp = std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()).count();
+
+                    sender.SendFrame(encodedData, timestamp, width, height, isKeyFrame);
+                    // std::cout << "Sent frame: " << encodedData.size() << " bytes, Key: " << isKeyFrame << std::endl;
+                }
+            }
+            capture.ReleaseFrame();
+            
+            frameCount++;
+        }
+
+        auto now = std::chrono::high_resolution_clock::now();
+        if (std::chrono::duration_cast<std::chrono::seconds>(now - lastTime).count() >= 1) {
+            std::cout << "FPS: " << frameCount << std::endl;
+            frameCount = 0;
+            lastTime = now;
+        }
+
+        // Don't sleep too much, CaptureFrame waits. 
+        // But if CaptureFrame returns immediately (high refresh rate), we might want to cap it?
+        // Desktop Duplication limits itself to screen refresh rate usually.
+    }
+
+    return 0;
+}