在 Linux 系统中当你尝试使用 socket() 函数创建原始套接字(raw socket)时,默认情况下会失败,错误号 EACCES(权限被拒绝)。这是因为Linux限制普通用户创建该类型套接字,只有具有超级用户权限(root权限)的用户可创建。

一、解决方案

解决该问题有如下三种方案:

1.1 方案一:直接以超级用户身份运行程序创建套接字

sudo ./your_program

1.2 方案二:调整程序权限

如果你不想以 root 用户身份运行整个程序,可以考虑将创建套接字的部分代码放在一个单独的可执行文件中,并通过 sudo 运行该部分代码。或者,你可以使用 setuid 来提升特定部分的权限,但这通常不推荐,因为它可能带来安全风险。

1.3 方案三:赋予程序 CAP_NET_RAW 能力来允许程序创建原始套接字

sudo setcap cap_net_raw+ep /path/to/your_program

前两个受安全性等限制不推荐。下面以模拟一个ping工具对方案三进行说明。

二、测试程序

包含三个测试程序:

libfunc.so依赖子库libfunc\_sub.so,elf中无libfunc\_sub.so路径的RUNPATH信息
  1. 不依赖任何库的ping
  2. 依赖libfunc.so的ping,。
  3. 依赖libfunc.so的ping(ping增加了libfunc.so的RUNPATH信息)

2.1 源文件main.cpp:

// 模拟ping程序发送ICMP数据包
// 仅ping主要功能,错误处理、回复验证等细节未做处理
#include <stdlib.h>
#include <stdio.h>
#include <memory.h>
#include <string.h>
#include <time.h>

#ifdef _WIN32
#  include <afxsock.h>
#else
#  include <ctype.h>
#  include <cerrno>
#  include <unistd.h>
#  include <fcntl.h>
#  include <signal.h>
#  include <sys/shm.h>
#  include <sys/socket.h>
#  include <netinet/in.h>
#  include <arpa/inet.h>
#  define SOCKET int
#  define SOCKET_ERROR -1
#endif

#ifdef LINK_LIB    //模拟使程序依赖另一个库
#include "lib.h"
static char* v = getversion();
#endif

typedef unsigned short u_short;
typedef unsigned char u_char;

#define REQ_DATASIZE    8           // Echo Request Data size
#define ICMP_ECHOREPLY  0
#define ICMP_ECHOREQ    8

typedef struct tagICMPHDR
{
    u_char  Type;                   // Type
    u_char  Code;                   // Code
    u_short Checksum;               // Checksum
    u_short ID;                     // Identification
    u_short Seq;                    // Sequence
    char    Data;                   // Data
}ICMPHDR, * PICMPHDR;

typedef struct tagECHOREQUEST
{
    ICMPHDR icmpHdr;
    int     dwTime;
    char    cData[REQ_DATASIZE];
}ECHOREQUEST, * PECHOREQUEST;

typedef struct tagIPHDR
{
    u_char  VIHL;                   // Version and IHL
    u_char  TOS;                    // Type Of Service
    short   TotLen;                 // Total Length
    short   ID;                     // Identification
    short   FlagOff;                // Flags and Fragment Offset
    u_char  TTL;                    // Time To Live
    u_char  Protocol;               // Protocol
    u_short Checksum;               // Checksum
    struct  in_addr iaSrc;          // Internet Address - Source
    struct  in_addr iaDst;          // Internet Address - Destination
}IPHDR, * PIPHDR;

// ICMP Echo Reply
typedef struct tagECHOREPLY
{
    IPHDR   ipHdr;
    ECHOREQUEST     echoRequest;
    char    cFiller[256];
}ECHOREPLY, * PECHOREPLY;


unsigned short  ping_in_cksum(unsigned short* addr, int len)
{
    int nleft = len;
    unsigned short* w = addr;
    unsigned short  answer;
    int sum = 0;

    /*
     *  Our algorithm is simple, using a 32 bit accumulator (sum),
     *  we add sequential 16 bit words to it, and at the end, fold
     *  back all the carry bits from the top 16 bits into the lower
     *  16 bits.
     */
    while (nleft > 1) {
        sum += *w++;
        nleft -= 2;
    }

    /* mop up an odd byte, if necessary */
    if (nleft == 1) {
        u_short u = 0;

        *(u_char*)(&u) = *(u_char*)w;
        sum += u;
    }

    /*
     * add back carry outs from top 16 bits to low 16 bits
     */
    sum = (sum >> 16) + (sum & 0xffff);     /* add hi 16 to low 16 */
    sum += (sum >> 16);                     /* add carry */
    answer = ~sum;                          /* truncate to 16 bits */
    return (answer);
}

int get_last_error()
{
#ifdef _WIN32
    return GetLastError();
#else
    return errno;
#endif
}

SOCKET sock;

int CloseSocket(void)
{
    if (sock < 0)
        return 0;
#ifdef _WIN32
    closesocket(sock);
#else
    close(sock);
#endif
    sock = -1;
    return 1;
}

int CreateSocket()
{
#ifdef _WIN32
    AfxSocketInit();
#endif
    CloseSocket();
    sock = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP);
    if (sock < 0) {
        fprintf(stderr, "\tsocket() error, ret=%d, errno=%d\n", (int)sock, get_last_error());
        return 0;
    }
    return 1;
}

int ping(char* addr, int timeout_secs)
{
    int ret, i;

    if (!CreateSocket()) {
        return -1;
    }

    struct sockaddr_in connaddr;
    memset((char*)&connaddr, 0, sizeof(connaddr));
    connaddr.sin_family = AF_INET;
    connaddr.sin_port = 0;
    connaddr.sin_addr.s_addr = inet_addr(addr);

#ifdef _WIN32
    unsigned long block_flag = TRUE;
    ioctlsocket(sock, FIONBIO, &block_flag);
#else
    sigset(SIGPIPE, SIG_IGN);
    fcntl(sock, F_SETFL, O_NDELAY);
#endif

    ECHOREQUEST echoReq;
    ECHOREPLY echoReply;

    fd_set  readfd;
    struct sockaddr rxaddr;
#ifdef _WIN32
    int  a_size = sizeof(rxaddr);
#else
    socklen_t a_size = sizeof(rxaddr);
#endif
    struct timeval tmptv;
    tmptv.tv_sec = 1;
    tmptv.tv_usec = 0;

    echoReq.icmpHdr.Type = ICMP_ECHOREQ;
    echoReq.icmpHdr.Code = 0;
    echoReq.icmpHdr.Checksum = 0;
    echoReq.icmpHdr.ID = 66;
    for (i = 0; i < REQ_DATASIZE; i++) {
        echoReq.cData[i] = i;
        echoReply.echoRequest.cData[i] = 0;
    }
    for (int ln = 0; ln < timeout_secs; ln++) {
        echoReq.icmpHdr.Seq = ln;
        echoReq.dwTime = (int)time(NULL);
        // Put data in packet and compute checksum
        echoReq.icmpHdr.Checksum = ping_in_cksum((u_short*)&echoReq, sizeof(ECHOREQUEST));
        // Send the echo request
        ret = sendto(sock, (char*)&echoReq, sizeof(ECHOREQUEST), 0, (struct sockaddr*)&connaddr, sizeof(struct sockaddr_in));
        if (ret == SOCKET_ERROR) continue;
        if (ret <= 0) {
            CloseSocket();
            return 0;
        }
        FD_ZERO(&readfd);
        FD_SET(sock, &readfd);
        ret = select(FD_SETSIZE, &readfd, 0, 0, &tmptv);
        if (ret > 0) {
            ret = recvfrom(sock, (char*)&echoReply, sizeof(echoReply), 0, &rxaddr, &a_size);
            if (ret > 32) {
                if (memcmp(echoReq.cData, echoReply.echoRequest.cData, REQ_DATASIZE) == 0) {
                    CloseSocket();
                    return 1;
                }
            }
        }
    }

    CloseSocket();
    return 0;
}

int main(int argc, char* argv[])
{
    int timeout_ses = 3;
    char addr[64] = { '\0' };
    if (argc > 1) {
        strncpy(addr, argv[1], 63);
    }
    else {
        strcpy(addr, "127.0.0.1");
    }
    for (int i = 0; i < 10; ++i) {
        int ret = ping(addr, timeout_ses);
        printf("ret=%s\n", ret == 1 ? "ok" : "not");

#ifdef _WIN32
        Sleep(1000);
#else
        sleep(1);
#endif
    }
    return 0;
}

2.2 Makefile:

ping: main.cpp
    g++ $^ -o $@ -Wno-deprecated-declarations

# ping依赖一个共享库/home/test/func/lib/libfunc.so,测试setcap后依赖库加载情况
ping-lib: main.cpp
    g++ $^ -o $@ -DLINK_LIB -I/home/test/func/include -L/home/test/func/lib -lfunc \
    -Wno-deprecated-declarations

ping-lib-rpath: main.cpp
    g++ $^ -o $@ -DLINK_LIB -I/home/test/func/include -L/home/test/func/lib -lfunc \
    -Wno-deprecated-declarations \
    -Wl,-rpath,/home/test/func/

clean:
    -rm ping ping-*

cap: ping ping-lib ping-lib-rpath
    sudo setcap cap_net_raw+eip $^

2.3 可执行文件依赖

程序的依赖情况如下,第3个程序与第2个依赖相同

test@Debian:~/tmp/ping/ping$ ldd ping
        linux-vdso.so.1 (0x00007ffff7b6e000)
        libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007f6936b10000)
        /lib64/ld-linux-x86-64.so.2 (0x00007f6936d15000)
test@Debian:~/tmp/ping/ping$ ldd ping-lib
        linux-vdso.so.1 (0x00007ffd5c382000)
        libfunc.so => /home/test/lib/libfunc.so (0x00007fd647303000)
        libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007fd647105000)
        libfunc_sub.so => /home/test/lib/libfunc_sub.so (0x00007fd6470fd000)
        libstdc++.so.6 => /lib/x86_64-linux-gnu/libstdc++.so.6 (0x00007fd646e00000)
        libm.so.6 => /lib/x86_64-linux-gnu/libm.so.6 (0x00007fd64701e000)
        libgcc_s.so.1 => /lib/x86_64-linux-gnu/libgcc_s.so.1 (0x00007fd646de0000)
        /lib64/ld-linux-x86-64.so.2 (0x00007fd647354000)

三、测试结果

可执行文件setcap前setcap后说明
pingEACCESOK赋予能力后可正常运行
ping-libEACCESENONELIB(func)赋予能力后运行无法找到依赖库
ping-lib-rpathEACCESENONELIB(func\_sub)赋予能力后运行无法找到间接依赖库(libfunc.so无RPATH信息导致间接依赖找不到)

报错输出示例:

  1. OK
test@Debian:~/tmp/ping/ping$ ./ping-lib-rpath
ret=ok
  1. EACCES
test@Debian:~/tmp/ping/ping$ ./ping
socket() error, ret=-1, errno=1
ret=not
  1. ENONELIB
test@Debian:~/tmp/ping/ping$ ./ping-lib
./ping-lib: error while loading shared libraries: libfunc.so: cannot open shared object file: No such file or director
test@Debian:~/tmp/ping/ping$ ./ping-lib-rpath
./ping-lib-rpath: error while loading shared libraries: libfunc_sub.so: cannot open shared object file: No such file or directory

四、依赖库配置

setcap 给一个二进制文件赋予能力(capability)后,内核会以更「受限」的方式执行它,影响依赖库加载过程。

4.1 LD\_LIBRARY\_PATH无效

根据ld.so(8) - Linux manual page,运行在安全执行模式的的程序会忽略一些环境变量,如LD\_LIBRARY\_PATH。赋予程序能力后即会在该模式下运行,这会使得通过LD\_LIBRARY\_PATH指定可执行文件依赖库路径方法失效。

Secure-execution mode

For security reasons, if the dynamic linker determines that a
binary should be run in secure-execution mode, the effects of some
environment variables are voided or modified, and furthermore
those environment variables are stripped from the environment, so
that the program does not even see the definitions. Some of these
environment variables affect the operation of the dynamic linker
itself, and are described below. Other environment variables
treated in this way include: GCONV_PATH, GETCONF_DIR, HOSTALIASES,
LOCALDOMAIN, LD_AUDIT, LD_DEBUG, LD_DEBUG_OUTPUT, LD_DYNAMIC_WEAK,
LD_HWCAP_MASK, LD_LIBRARY_PATH, LD_ORIGIN_PATH, LD_PRELOAD,
LD_PROFILE, LD_SHOW_AUXV, LOCALDOMAIN, LOCPATH, MALLOC_TRACE,
NIS_PATH, NLSPATH, RESOLV_HOST_CONF, RES_OPTIONS, TMPDIR, and
TZDIR.

A binary is executed in secure-execution mode if the AT_SECURE

entry in the auxiliary vector (see getauxval(3)) has a nonzero
value. This entry may have a nonzero value for various reasons,
including:

• The process's real and effective user IDs differ, or the real
and effective group IDs differ. This typically occurs as a
result of executing a set-user-ID or set-group-ID program.

• **A process with a non-root user ID executed a binary that
conferred capabilities to the process**.

• A nonzero value may have been set by a Linux Security Module.

4.2 把依赖库放到系统默认路径

echo "/your/custom/lib" | sudo tee /etc/ld.so.conf.d/customlib.conf
sudo ldconfig

这样做后,系统会将该目录加入全局库搜索路径,但会影响所有用户,不推荐多用户系统环境使用。

4.3 使用 rpath 或 runpath(推荐)

使用链接参数把库路径写进二进制文件,例如使用 gcc 编译时加上:

-Wl,-rpath,/your/custom/lib/path

或者手动修改已有程序的 rpath/runpath:

chrpath -r /your/custom/lib/path your_binary

这样运行时不依赖 LD_LIBRARY_PATH,解决兼容性问题。

但需要注意,所依赖库的也需要正确设置rpath信息,否则会导致间接依赖库找不到问题,正如 ping-lib-rpath示例。

rpath使用注意

根据glibc(debian12-2.36)源码中对 $ORIGIN变量的处理,运行SUID/SGID程序时,只有展开后的路径属性信任目录才会生效。此信任目录包括默认RPATH加libc库编译时指定,故使用$ORIGIN表示的自定义相对路径通常情况下会被忽略