diff --git a/.gitignore b/.gitignore index 38bf577..3d07b47 100644 --- a/.gitignore +++ b/.gitignore @@ -1,63 +1,55 @@ -# Build directories -build/ -out/ -bin/ -lib/ -*.exe -*.dll +# Prerequisites +*.d + +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries *.so *.dylib +*.dll + +# Fortran module files +*.mod +*.smod + +# Compiled Static libraries +*.lai +*.la *.a *.lib +# Executables +*.exe +*.out +*.app + # CMake +cmake_install.cmake CMakeCache.txt CMakeFiles/ -cmake_install.cmake Makefile -*.cmake -!CMakeLists.txt +build/ -# IDE +# Visual Studio .vs/ +*.vcxproj +*.vcxproj.filters +*.vcxproj.user +*.sln +*.sdf +*.opensdf +*.suo + +# Editors .vscode/ .idea/ *.swp -*.swo *~ -.DS_Store - -# Android -*.apk -*.ap_ -*.dex -*.class -local.properties -.gradle/ -build/ -.idea/ -*.iml - -# Windows -Thumbs.db -ehthumbs.db -Desktop.ini - -# Dependencies -third_party/ -external/ -vcpkg_installed/ - -# Logs -*.log - -# Temporary files -*.tmp -*.temp -*.bak -*.backup - -# FlatBuffers generated files (optional, if you want to track them) -# *.generated.h -# *.generated.cpp - diff --git a/demo/windows_sender/CMakeLists.txt b/demo/windows_sender/CMakeLists.txt new file mode 100644 index 0000000..c61dd98 --- /dev/null +++ b/demo/windows_sender/CMakeLists.txt @@ -0,0 +1,53 @@ +cmake_minimum_required(VERSION 3.10) +project(WindowsSenderDemo) + +set(CMAKE_CXX_STANDARD 17) + +# Windows specific +if(WIN32) + add_definitions(-DUNICODE -D_UNICODE -DWIN32_LEAN_AND_MEAN -DNOMINMAX) +endif() + +# Source files +set(SOURCES + main.cpp + ScreenCapture.cpp + ScreenCapture.h + VideoEncoder.cpp + VideoEncoder.h + NetworkSender.cpp + NetworkSender.h +) + +add_executable(WindowsSenderDemo ${SOURCES}) + +# Libraries +# Media Foundation libraries are removed as we switch to FFmpeg +# target_link_libraries(WindowsSenderDemo d3d11 dxgi mf mfplat mfuuid ws2_32 mfreadwrite) + +# Find FFmpeg +# You might need to set FFMPEG_ROOT to your FFmpeg installation path +# e.g. cmake .. -DFFMPEG_ROOT="C:/ffmpeg" + +find_path(AVCODEC_INCLUDE_DIR libavcodec/avcodec.h PATHS ${FFMPEG_ROOT}/include) +find_library(AVCODEC_LIBRARY avcodec PATHS ${FFMPEG_ROOT}/lib) +find_library(AVFORMAT_LIBRARY avformat PATHS ${FFMPEG_ROOT}/lib) +find_library(AVUTIL_LIBRARY avutil PATHS ${FFMPEG_ROOT}/lib) +find_library(SWSCALE_LIBRARY swscale PATHS ${FFMPEG_ROOT}/lib) + +if (AVCODEC_INCLUDE_DIR AND AVCODEC_LIBRARY) + include_directories(${AVCODEC_INCLUDE_DIR}) + message(STATUS "Found FFmpeg: ${AVCODEC_INCLUDE_DIR}") +else() + message(FATAL_ERROR "FFmpeg not found! Please set FFMPEG_ROOT to your FFmpeg installation.") +endif() + +target_link_libraries(WindowsSenderDemo + d3d11 + dxgi + ws2_32 + ${AVCODEC_LIBRARY} + ${AVFORMAT_LIBRARY} + ${AVUTIL_LIBRARY} + ${SWSCALE_LIBRARY} +) diff --git a/demo/windows_sender/NetworkSender.cpp b/demo/windows_sender/NetworkSender.cpp new file mode 100644 index 0000000..19656ba --- /dev/null +++ b/demo/windows_sender/NetworkSender.cpp @@ -0,0 +1,86 @@ +#include "NetworkSender.h" +#include +#include + +struct TransportHeader { + uint32_t frameId; + uint16_t fragId; + uint16_t totalFrags; +}; + +NetworkSender::NetworkSender() { + WSADATA wsaData; + WSAStartup(MAKEWORD(2, 2), &wsaData); +} + +NetworkSender::~NetworkSender() { + if (socket_ != INVALID_SOCKET) { + closesocket(socket_); + } + WSACleanup(); +} + +bool NetworkSender::Initialize(const std::string& ip, int port) { + socket_ = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + if (socket_ == INVALID_SOCKET) return false; + + // Set buffer size + int sndBuf = 1024 * 1024; // 1MB + setsockopt(socket_, SOL_SOCKET, SO_SNDBUF, (char*)&sndBuf, sizeof(sndBuf)); + + destAddr_.sin_family = AF_INET; + destAddr_.sin_port = htons(port); + inet_pton(AF_INET, ip.c_str(), &destAddr_.sin_addr); + + return true; +} + +bool NetworkSender::SendFrame(const std::vector& data, uint64_t timestamp, int width, int height, bool isKeyFrame) { + // 1. Serialize Frame Info + PacketHeader header; + header.timestamp = timestamp; + header.width = width; + header.height = height; + header.frameType = isKeyFrame ? 0 : 1; + header.dataSize = (uint32_t)data.size(); + + std::vector buffer; + buffer.resize(sizeof(PacketHeader) + data.size()); + memcpy(buffer.data(), &header, sizeof(PacketHeader)); + memcpy(buffer.data() + sizeof(PacketHeader), data.data(), data.size()); + + // 2. Fragment and Send + const int MTU = 1400; // Safe MTU + const int HEADER_SIZE = sizeof(TransportHeader); + const int PAYLOAD_SIZE = MTU - HEADER_SIZE; + + size_t totalSize = buffer.size(); + size_t totalFrags = (totalSize + PAYLOAD_SIZE - 1) / PAYLOAD_SIZE; + + static uint32_t frameId = 0; + frameId++; + + for (size_t i = 0; i < totalFrags; ++i) { + TransportHeader transHeader; + transHeader.frameId = frameId; + transHeader.fragId = (uint16_t)i; + transHeader.totalFrags = (uint16_t)totalFrags; + + size_t offset = i * PAYLOAD_SIZE; + size_t chunkSize = std::min((size_t)PAYLOAD_SIZE, totalSize - offset); + + std::vector packet; + packet.resize(HEADER_SIZE + chunkSize); + + memcpy(packet.data(), &transHeader, HEADER_SIZE); + memcpy(packet.data() + HEADER_SIZE, buffer.data() + offset, chunkSize); + + int sent = sendto(socket_, (const char*)packet.data(), (int)packet.size(), 0, (sockaddr*)&destAddr_, sizeof(destAddr_)); + if (sent < 0) { + // std::cerr << "Send failed" << std::endl; + // Continue sending other fragments anyway + } + } + + return true; +} diff --git a/demo/windows_sender/NetworkSender.h b/demo/windows_sender/NetworkSender.h new file mode 100644 index 0000000..b83952b --- /dev/null +++ b/demo/windows_sender/NetworkSender.h @@ -0,0 +1,30 @@ +#pragma once + +#include +#include +#include +#include +#include + +#pragma comment(lib, "ws2_32.lib") + +struct PacketHeader { + uint64_t timestamp; + uint32_t width; + uint32_t height; + uint32_t frameType; // 0=I, 1=P + uint32_t dataSize; +}; + +class NetworkSender { +public: + NetworkSender(); + ~NetworkSender(); + + bool Initialize(const std::string& ip, int port); + bool SendFrame(const std::vector& data, uint64_t timestamp, int width, int height, bool isKeyFrame); + +private: + SOCKET socket_ = INVALID_SOCKET; + sockaddr_in destAddr_; +}; diff --git a/demo/windows_sender/README.md b/demo/windows_sender/README.md new file mode 100644 index 0000000..2b35cd7 --- /dev/null +++ b/demo/windows_sender/README.md @@ -0,0 +1,58 @@ +# Windows Sender Demo + +This is a simplified demo of the Windows Host sender for DisplayFlow. +It demonstrates: +1. Screen Capture using Desktop Duplication API (DXGI). +2. Hardware H.264 Encoding using Media Foundation (MF). +3. Network Transmission using UDP. + +## Prerequisites + +- Windows 10/11 +- Visual Studio 2019 or later (with C++ Desktop Development) +- CMake 3.10+ + +## Build + +1. Open a terminal (Developer Command Prompt for VS). +2. Navigate to this directory: + ```cmd + cd demo\windows_sender + ``` +3. Create a build directory: + ```cmd + mkdir build + cd build + ``` +4. Configure and Build: + ```cmd + cmake .. + cmake --build . --config Release + ``` + +## Run + +Run the executable with target IP and Port (optional): + +```cmd +.\Release\WindowsSenderDemo.exe +``` + +Example: +```cmd +.\Release\WindowsSenderDemo.exe 192.168.1.100 8888 +``` + +## Implementation Details + +- **ScreenCapture**: Uses `IDXGIOutputDuplication` to capture desktop frames. +- **VideoEncoder**: Uses `IMFTransform` (Media Foundation) to encode frames to H.264. + - *Note*: This demo attempts to feed RGB32 textures to the encoder. If the hardware encoder only supports NV12, conversion logic is needed (not fully implemented in this minimal demo). +- **NetworkSender**: Fragments the H.264 stream into UDP packets (MTU ~1400 bytes) and sends them to the target. + +## Protocol + +The demo uses a simple custom protocol for feasibility verification: +- **Packet Header** (Network Layer): `FrameID` (4B), `FragID` (2B), `TotalFrags` (2B). +- **Frame Header** (Application Layer, in first fragment/reassembled): `Timestamp` (8B), `Width` (4B), `Height` (4B), `Type` (4B), `Size` (4B). +- **Payload**: H.264 NAL units. diff --git a/demo/windows_sender/ScreenCapture.cpp b/demo/windows_sender/ScreenCapture.cpp new file mode 100644 index 0000000..5288f20 --- /dev/null +++ b/demo/windows_sender/ScreenCapture.cpp @@ -0,0 +1,120 @@ +#include "ScreenCapture.h" +#include + +ScreenCapture::ScreenCapture() = default; + +ScreenCapture::~ScreenCapture() { + if (frame_acquired_) { + ReleaseFrame(); + } +} + +bool ScreenCapture::Initialize() { + HRESULT hr = S_OK; + + // Create D3D11 Device and Context + D3D_FEATURE_LEVEL featureLevels[] = { + D3D_FEATURE_LEVEL_11_1, + D3D_FEATURE_LEVEL_11_0, + }; + D3D_FEATURE_LEVEL featureLevel; + + hr = D3D11CreateDevice( + nullptr, + D3D_DRIVER_TYPE_HARDWARE, + nullptr, + D3D11_CREATE_DEVICE_BGRA_SUPPORT, // Needed for GDI compatibility if used, but generally good for D2D + featureLevels, + ARRAYSIZE(featureLevels), + D3D11_SDK_VERSION, + &device_, + &featureLevel, + &context_ + ); + + if (FAILED(hr)) { + std::cerr << "Failed to create D3D11 device: " << std::hex << hr << std::endl; + return false; + } + + // Get DXGI Device + ComPtr dxgiDevice; + hr = device_.As(&dxgiDevice); + if (FAILED(hr)) return false; + + // Get DXGI Adapter + ComPtr dxgiAdapter; + hr = dxgiDevice->GetAdapter(&dxgiAdapter); + if (FAILED(hr)) return false; + + // Get DXGI Output (Monitor 0) + ComPtr dxgiOutput; + hr = dxgiAdapter->EnumOutputs(0, &dxgiOutput); + if (FAILED(hr)) { + std::cerr << "Failed to get DXGI output (monitor connected?)" << std::endl; + return false; + } + + // QI for Output1 to support Duplication + ComPtr dxgiOutput1; + hr = dxgiOutput.As(&dxgiOutput1); + if (FAILED(hr)) return false; + + // Create Desktop Duplication + hr = dxgiOutput1->DuplicateOutput(device_.Get(), &duplication_); + if (FAILED(hr)) { + std::cerr << "Failed to duplicate output. Error: " << std::hex << hr << std::endl; + // Common errors: E_ACCESSDENIED (already duplicated), DXGI_ERROR_UNSUPPORTED (switchable graphics) + return false; + } + + return true; +} + +bool ScreenCapture::CaptureFrame(ComPtr& texture) { + if (frame_acquired_) { + ReleaseFrame(); + } + + DXGI_OUTDUPL_FRAME_INFO frameInfo; + ComPtr resource; + + // Timeout 100ms + HRESULT hr = duplication_->AcquireNextFrame(100, &frameInfo, &resource); + + if (hr == DXGI_ERROR_WAIT_TIMEOUT) { + return false; // No new frame + } + + if (FAILED(hr)) { + // Maybe device lost or resolution changed + std::cerr << "AcquireNextFrame failed: " << std::hex << hr << std::endl; + return false; + } + + frame_acquired_ = true; + + // Only process if we have a desktop image update + if (frameInfo.LastPresentTime.QuadPart == 0) { + return false; // Only cursor moved or something else, no image update? + // Actually AcquireNextFrame returns even if only cursor updated. + // But for video stream we might want to send anyway or skip. + // If resource is null, it means no desktop image update. + } + + if (!resource) { + return false; + } + + hr = resource.As(&texture); + if (FAILED(hr)) return false; + + return true; +} + +void ScreenCapture::ReleaseFrame() { + if (frame_acquired_ && duplication_) { + duplication_->ReleaseFrame(); + frame_acquired_ = false; + } +} diff --git a/demo/windows_sender/ScreenCapture.h b/demo/windows_sender/ScreenCapture.h new file mode 100644 index 0000000..a52a45f --- /dev/null +++ b/demo/windows_sender/ScreenCapture.h @@ -0,0 +1,27 @@ +#pragma once + +#include +#include +#include +#include + +using Microsoft::WRL::ComPtr; + +class ScreenCapture { +public: + ScreenCapture(); + ~ScreenCapture(); + + bool Initialize(); + bool CaptureFrame(ComPtr& texture); + void ReleaseFrame(); + + ID3D11Device* GetDevice() const { return device_.Get(); } + ID3D11DeviceContext* GetContext() const { return context_.Get(); } + +private: + ComPtr device_; + ComPtr context_; + ComPtr duplication_; + bool frame_acquired_ = false; +}; diff --git a/demo/windows_sender/VideoEncoder.cpp b/demo/windows_sender/VideoEncoder.cpp new file mode 100644 index 0000000..bc4a622 --- /dev/null +++ b/demo/windows_sender/VideoEncoder.cpp @@ -0,0 +1,146 @@ +#include "VideoEncoder.h" +#include + +VideoEncoder::VideoEncoder() = default; + +VideoEncoder::~VideoEncoder() { + if (swsContext_) sws_freeContext(swsContext_); + if (codecContext_) avcodec_free_context(&codecContext_); + if (frame_) av_frame_free(&frame_); + if (packet_) av_packet_free(&packet_); + if (stagingTexture_) stagingTexture_.Reset(); +} + +bool VideoEncoder::Initialize(ID3D11Device* device, int width, int height, int fps, int bitrate) { + device_ = device; + device_->GetImmediateContext(&context_); + width_ = width; + height_ = height; + + // 1. Create Staging Texture for CPU access + D3D11_TEXTURE2D_DESC desc = {}; + desc.Width = width; + desc.Height = height; + desc.MipLevels = 1; + desc.ArraySize = 1; + desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; + desc.SampleDesc.Count = 1; + desc.Usage = D3D11_USAGE_STAGING; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; + + HRESULT hr = device_->CreateTexture2D(&desc, nullptr, &stagingTexture_); + if (FAILED(hr)) { + std::cerr << "Failed to create staging texture" << std::endl; + return false; + } + + // 2. Initialize FFmpeg + const AVCodec* codec = avcodec_find_encoder(AV_CODEC_ID_H264); + if (!codec) { + std::cerr << "Codec H.264 not found" << std::endl; + return false; + } + + codecContext_ = avcodec_alloc_context3(codec); + if (!codecContext_) { + std::cerr << "Could not allocate video codec context" << std::endl; + return false; + } + + codecContext_->bit_rate = bitrate; + codecContext_->width = width; + codecContext_->height = height; + codecContext_->time_base = {1, fps}; + codecContext_->framerate = {fps, 1}; + codecContext_->gop_size = 10; + codecContext_->max_b_frames = 1; + codecContext_->pix_fmt = AV_PIX_FMT_YUV420P; + + // Set H.264 specific options (latency vs quality) + av_opt_set(codecContext_->priv_data, "preset", "ultrafast", 0); + av_opt_set(codecContext_->priv_data, "tune", "zerolatency", 0); + + if (avcodec_open2(codecContext_, codec, nullptr) < 0) { + std::cerr << "Could not open codec" << std::endl; + return false; + } + + frame_ = av_frame_alloc(); + if (!frame_) { + std::cerr << "Could not allocate video frame" << std::endl; + return false; + } + frame_->format = codecContext_->pix_fmt; + frame_->width = codecContext_->width; + frame_->height = codecContext_->height; + + if (av_frame_get_buffer(frame_, 32) < 0) { + std::cerr << "Could not allocate the video frame data" << std::endl; + return false; + } + + packet_ = av_packet_alloc(); + if (!packet_) { + std::cerr << "Could not allocate packet" << std::endl; + return false; + } + + return true; +} + +bool VideoEncoder::EncodeFrame(ID3D11Texture2D* texture, std::vector& outputData, bool& isKeyFrame) { + if (!texture || !stagingTexture_ || !context_) return false; + + // 1. Copy GPU texture to Staging texture + context_->CopyResource(stagingTexture_.Get(), texture); + + // 2. Map Staging texture to read data + D3D11_MAPPED_SUBRESOURCE mapped = {}; + HRESULT hr = context_->Map(stagingTexture_.Get(), 0, D3D11_MAP_READ, 0, &mapped); + if (FAILED(hr)) return false; + + // 3. Convert BGRA to YUV420P + if (!swsContext_) { + swsContext_ = sws_getContext( + width_, height_, AV_PIX_FMT_BGRA, + width_, height_, AV_PIX_FMT_YUV420P, + SWS_BILINEAR, nullptr, nullptr, nullptr + ); + } + + uint8_t* srcSlice[] = { (uint8_t*)mapped.pData }; + int srcStride[] = { (int)mapped.RowPitch }; + + // We need to handle potential padding in mapped.RowPitch vs width*4 + // FFmpeg handles strides correctly. + + sws_scale(swsContext_, srcSlice, srcStride, 0, height_, frame_->data, frame_->linesize); + + context_->Unmap(stagingTexture_.Get(), 0); + + // 4. Encode + frame_->pts = pts_++; + + int ret = avcodec_send_frame(codecContext_, frame_); + if (ret < 0) { + std::cerr << "Error sending a frame for encoding" << std::endl; + return false; + } + + while (ret >= 0) { + ret = avcodec_receive_packet(codecContext_, packet_); + if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) + break; + else if (ret < 0) { + std::cerr << "Error during encoding" << std::endl; + return false; + } + + outputData.insert(outputData.end(), packet_->data, packet_->data + packet_->size); + if (packet_->flags & AV_PKT_FLAG_KEY) isKeyFrame = true; + + av_packet_unref(packet_); + } + + return true; +} diff --git a/demo/windows_sender/VideoEncoder.h b/demo/windows_sender/VideoEncoder.h new file mode 100644 index 0000000..a063e03 --- /dev/null +++ b/demo/windows_sender/VideoEncoder.h @@ -0,0 +1,38 @@ +#pragma once + +#include +#include +#include +#include + +extern "C" { +#include +#include +#include +#include +} + +using Microsoft::WRL::ComPtr; + +class VideoEncoder { +public: + VideoEncoder(); + ~VideoEncoder(); + + bool Initialize(ID3D11Device* device, int width, int height, int fps, int bitrate); + bool EncodeFrame(ID3D11Texture2D* texture, std::vector& outputData, bool& isKeyFrame); + +private: + ID3D11Device* device_ = nullptr; + ID3D11DeviceContext* context_ = nullptr; + ComPtr stagingTexture_; + + AVCodecContext* codecContext_ = nullptr; + AVFrame* frame_ = nullptr; + AVPacket* packet_ = nullptr; + SwsContext* swsContext_ = nullptr; + + int width_ = 0; + int height_ = 0; + int pts_ = 0; +}; diff --git a/demo/windows_sender/main.cpp b/demo/windows_sender/main.cpp new file mode 100644 index 0000000..1ada164 --- /dev/null +++ b/demo/windows_sender/main.cpp @@ -0,0 +1,90 @@ +#include "NetworkSender.h" +#include "ScreenCapture.h" +#include "VideoEncoder.h" +#include +#include +#include + +int main(int argc, char* argv[]) { + std::string ip = "127.0.0.1"; + int port = 8888; + + if (argc > 1) ip = argv[1]; + if (argc > 2) port = std::stoi(argv[2]); + + std::cout << "Starting Windows Sender Demo..." << std::endl; + std::cout << "Target: " << ip << ":" << port << std::endl; + + ScreenCapture capture; + if (!capture.Initialize()) { + std::cerr << "Failed to initialize Screen Capture" << std::endl; + return 1; + } + + // Get screen size + D3D11_TEXTURE2D_DESC desc; + ComPtr frame; + // Capture one frame to get size + std::cout << "Waiting for first frame..." << std::endl; + while (!capture.CaptureFrame(frame)) { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + frame->GetDesc(&desc); + capture.ReleaseFrame(); + + int width = desc.Width; + int height = desc.Height; + std::cout << "Screen Size: " << width << "x" << height << std::endl; + + VideoEncoder encoder; + if (!encoder.Initialize(capture.GetDevice(), width, height, 60, 4000000)) { // 4Mbps + std::cerr << "Failed to initialize Video Encoder" << std::endl; + return 1; + } + + NetworkSender sender; + if (!sender.Initialize(ip, port)) { + std::cerr << "Failed to initialize Network Sender" << std::endl; + return 1; + } + + std::cout << "Streaming started. Press Ctrl+C to stop." << std::endl; + + int frameCount = 0; + auto lastTime = std::chrono::high_resolution_clock::now(); + + while (true) { + ComPtr texture; + if (capture.CaptureFrame(texture)) { + std::vector encodedData; + bool isKeyFrame = false; + + if (encoder.EncodeFrame(texture.Get(), encodedData, isKeyFrame)) { + if (!encodedData.empty()) { + // Current timestamp in ms + auto now = std::chrono::high_resolution_clock::now(); + uint64_t timestamp = std::chrono::duration_cast(now.time_since_epoch()).count(); + + sender.SendFrame(encodedData, timestamp, width, height, isKeyFrame); + // std::cout << "Sent frame: " << encodedData.size() << " bytes, Key: " << isKeyFrame << std::endl; + } + } + capture.ReleaseFrame(); + + frameCount++; + } + + auto now = std::chrono::high_resolution_clock::now(); + if (std::chrono::duration_cast(now - lastTime).count() >= 1) { + std::cout << "FPS: " << frameCount << std::endl; + frameCount = 0; + lastTime = now; + } + + // Don't sleep too much, CaptureFrame waits. + // But if CaptureFrame returns immediately (high refresh rate), we might want to cap it? + // Desktop Duplication limits itself to screen refresh rate usually. + } + + return 0; +}