6

一文了解Linux Kernel中密码学算法的设计与应用-51CTO.COM

 2 years ago
source link: https://os.51cto.com/article/708361.html
Go to the source link to view the article. You can view the picture content, updated content and better typesetting reading experience. If the link is broken, please click the button below to view the snapshot at that time.
neoserver,ios ssh client

1.密码学基础知识

基本概念,如下请自行学习和理解:

  • 非对称密码

2.Kernel密码学算法的软件框架和接口模型

Linux Kernel系统中实现了很多算法,这些算法被统一归纳为:对称密码算法、数字摘要算法、随机数算法、认证加密算法、非对称密码算法等,并在Kernel层提供了统一操作的接口,供kernel其他模块调用。部分算法又被封装到了网络层,开放暴露给Userspace。其具体的结构/接口模型如下所示:

15d02f0542f025cfa0d48996e57578390f3ff6.png

2.1Userspace对底层密码算法的访问

Userspace通过netlink接口方式( PF_ALG)调用到底层算法的实现

f9fa946886b4992d825712213ac196eb09a9e1.png

在Userspace,需指定socket接口 PF_ALG,需指定算法名称(如skcipher)、需指定具体调用的"算法实现"(如aes-cbc),这样命令传输到Kernel层,就能根据这些信息跳转到响应的算法实现层。注意akcipher算法没有暴露给网络层,也就没有开放给Userspace了,所以在User程序中,是无法调用Kernel层的非对称密码算法的。

如下是一个Userspace程序调用kernel底层算法的示例:

(1)建立一个socket会话的流程:

socket(AF_ALG,...)
bind()
setsockopt
accept
sendmsg
recvmsg

(2)相关代码

static int linux_af_alg_socket(const char *type, const char *name)
{
  struct sockaddr_alg sa;
  int s;

  s = socket(AF_ALG, SOCK_SEQPACKET, 0);
  if (s < 0) {
    LogErr("%s: Failed to open AF_ALG socket: %s\n",
         __func__, strerror(errno));
    return -1;
  }

  os_memset(&sa, 0, sizeof(sa));
  sa.salg_family = AF_ALG;
  os_strlcpy((char *) sa.salg_type, type, sizeof(sa.salg_type));
  os_strlcpy((char *) sa.salg_name, name, sizeof(sa.salg_name));
  if (bind(s, (struct sockaddr *) &sa, sizeof(sa)) < 0) {
    LogErr("%s: Failed to bind AF_ALG socket(%s,%s): %s\n",__func__, (char *) sa.salg_type, (char *) sa.salg_name, strerror(errno));
    close(s);
    return -1;
  }

  return s;
}

static struct linux_af_alg_skcipher *linux_af_alg_skcipher(const char *alg, const u8 *key, size_t key_len)
{
  struct linux_af_alg_skcipher *skcipher;

  skcipher = os_zalloc(sizeof(*skcipher));
  if (!skcipher)
    goto fail;
  skcipher->t = -1;

  skcipher->s = linux_af_alg_socket(TYPE_NAME, alg);
  if (skcipher->s < 0)
    goto fail;

  if (setsockopt(skcipher->s, SOL_ALG, ALG_SET_KEY, key, key_len) < 0) {
    LogErr("%s: setsockopt(ALG_SET_KEY) failed: %s\n",
         __func__, strerror(errno));
    goto fail;
  }

  skcipher->t = accept(skcipher->s, NULL, NULL);
  if (skcipher->t < 0) {
    LogErr("%s: accept on AF_ALG socket failed: %s\n",
         __func__, strerror(errno));
    goto fail;
  }

  return skcipher;
fail:
  linux_af_alg_skcipher_deinit(skcipher);
  return NULL;
}

static int aes_128_cbc_oper(char *alg_name, const u8 *key,size_t key_len, int enc, const u8 *iv, u8 *data, size_t data_len)
{
  struct linux_af_alg_skcipher *skcipher;
  char buf[100];
  struct iovec io[1];
  struct msghdr msg;
  struct cmsghdr *hdr;
  ssize_t ret;
  u32 *op;
  struct af_alg_iv *alg_iv;
  size_t iv_len = AES_BLOCK_SIZE;

  skcipher = linux_af_alg_skcipher(alg_name, key, key_len);//alg_name = "__cbc-aes-asr-ce"
  if (!skcipher)
    return -1;

  io[0].iov_base = (void *) data;
  io[0].iov_len = data_len;
  os_memset(&msg, 0, sizeof(msg));
  os_memset(buf, 0, sizeof(buf));
  msg.msg_control = buf;
  msg.msg_controllen = CMSG_SPACE(sizeof(u32)) +
    CMSG_SPACE(sizeof(*alg_iv) + iv_len);
  msg.msg_iov = io;
  msg.msg_iovlen = 1;

  hdr = CMSG_FIRSTHDR(&msg);
  hdr->cmsg_level = SOL_ALG;
  hdr->cmsg_type = ALG_SET_OP;
  hdr->cmsg_len = CMSG_LEN(sizeof(u32));
  op = (u32 *) CMSG_DATA(hdr);
  *op = enc ? ALG_OP_ENCRYPT : ALG_OP_DECRYPT;

  hdr = CMSG_NXTHDR(&msg, hdr);
  hdr->cmsg_level = SOL_ALG;
  hdr->cmsg_type = ALG_SET_IV;
  hdr->cmsg_len = CMSG_SPACE(sizeof(*alg_iv) + iv_len);
  alg_iv = (struct af_alg_iv *) CMSG_DATA(hdr);
  if(NULL != iv){
    alg_iv->ivlen = iv_len;
    os_memcpy(alg_iv->iv, iv, iv_len);
  }else
  {
    alg_iv->ivlen = 0;
  }

  ret = sendmsg(skcipher->t, &msg, 0);
  if (ret < 0) {
    LogErr("%s: sendmsg failed: %s\n",
         __func__, strerror(errno));
    linux_af_alg_skcipher_deinit(skcipher);
    return -1;
  }

  ret = recvmsg(skcipher->t, &msg, 0);
  if (ret < 0) {
    LogErr("%s: recvmsg failed: %s\n",
         __func__, strerror(errno));
    linux_af_alg_skcipher_deinit(skcipher);
    return -1;
  }
  if ((size_t) ret < data_len) {
    LogErr(
         "%s: recvmsg not return full data (%d/%d)\n",
         __func__, (int) ret, (int) data_len);
    linux_af_alg_skcipher_deinit(skcipher);
    return -1;
  }

  //s_to_binary(data,data_len);
  linux_af_alg_skcipher_deinit(skcipher);
  return 0;
}

2.2Kernelspace对底层密码算法的访问

Kernel程序对底层算法的调用采用函数直接调用的方式。流程为:kernel程序--->算法中间层--->算法实现层. 算法中间层 就是暴露给kernel其它模块的API函数。

如下是一个kernel中调用底层算法的一个示例(因skcipher为例):

static int test_skcipher(void)
{
        struct crypto_skcipher *tfm = NULL;
        struct skcipher_request *req = NULL;
        u8 *data = NULL;
        const size_t datasize = 512; /* data size in bytes */
        struct scatterlist sg;
        DECLARE_CRYPTO_WAIT(wait);
        u8 iv[16];  /* AES-256-XTS takes a 16-byte IV */
        u8 key[64]; /* AES-256-XTS takes a 64-byte key */
        int err;

        /*
         * Allocate a tfm (a transformation object) and set the key.
         *
         * In real-world use, a tfm and key are typically used for many
         * encryption/decryption operations.  But in this example, we'll just do a
         * single encryption operation with it (which is not very efficient).
         */

        tfm = crypto_alloc_skcipher("xts(aes)", 0, 0);
        if (IS_ERR(tfm)) {
                pr_err("Error allocating xts(aes) handle: %ld\n", PTR_ERR(tfm));
                return PTR_ERR(tfm);
        }

        get_random_bytes(key, sizeof(key));
        err = crypto_skcipher_setkey(tfm, key, sizeof(key));
        if (err) {
                pr_err("Error setting key: %d\n", err);
                goto out;
        }

        /* Allocate a request object */
        req = skcipher_request_alloc(tfm, GFP_KERNEL);
        if (!req) {
                err = -ENOMEM;
                goto out;
        }

        /* Prepare the input data */
        data = kmalloc(datasize, GFP_KERNEL);
        if (!data) {
                err = -ENOMEM;
                goto out;
        }
        get_random_bytes(data, datasize);

        /* Initialize the IV */
        get_random_bytes(iv, sizeof(iv));

        /*
         * Encrypt the data in-place.
         *
         * For simplicity, in this example we wait for the request to complete
         * before proceeding, even if the underlying implementation is asynchronous.
         *
         * To decrypt instead of encrypt, just change crypto_skcipher_encrypt() to
         * crypto_skcipher_decrypt().
         */
        sg_init_one(&sg, data, datasize);
        skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG |
                                           CRYPTO_TFM_REQ_MAY_SLEEP,
                                      crypto_req_done, &wait);
        skcipher_request_set_crypt(req, &sg, &sg, datasize, iv);
        err = crypto_wait_req(crypto_skcipher_encrypt(req), &wait);
        if (err) {
                pr_err("Error encrypting data: %d\n", err);
                goto out;
        }

        pr_debug("Encryption was successful\n");
out:
        crypto_free_skcipher(tfm);
        skcipher_request_free(req);
        kfree(data);
        return err;
}

2.3增加一个算法实现

增加一个"算法的实现" 只需要:

  • 定义一个该算法的结构体变量并初始化,其实就是实现其中的成员函数
  • 将该算法实现注册到系统中。

结构体的定义并初始化:

static struct skcipher_alg aes_algs[] = { 
  {
    .base.cra_name    = "__ecb(aes)",
    .base.cra_driver_name  = "__ecb-aes-neonbs",
    .base.cra_priority  = 250,
    .base.cra_blocksize  = AES_BLOCK_SIZE,
    .base.cra_ctxsize  = sizeof(struct aesbs_ctx),
    .base.cra_module  = THIS_MODULE,
    .base.cra_flags    = CRYPTO_ALG_INTERNAL,

    .min_keysize    = AES_MIN_KEY_SIZE,
    .max_keysize    = AES_MAX_KEY_SIZE,
    .walksize    = 8 * AES_BLOCK_SIZE,
    .setkey      = aesbs_setkey,
    .encrypt    = ecb_encrypt,
    .decrypt    = ecb_decrypt,
  }, 

  {
    .base.cra_name    = "__cbc(aes)",
    .base.cra_driver_name  = "__cbc-aes-neonbs",
    .base.cra_priority  = 250,
    .base.cra_blocksize  = AES_BLOCK_SIZE,
    .base.cra_ctxsize  = sizeof(struct aesbs_cbc_ctx),
    .base.cra_module  = THIS_MODULE,
    .base.cra_flags    = CRYPTO_ALG_INTERNAL,

    .min_keysize    = AES_MIN_KEY_SIZE,
    .max_keysize    = AES_MAX_KEY_SIZE,
    .walksize    = 8 * AES_BLOCK_SIZE,
    .ivsize      = AES_BLOCK_SIZE,
    .setkey      = aesbs_cbc_setkey,
    .encrypt    = cbc_encrypt,
    .decrypt    = cbc_decrypt,
  }
};

成员函数的实现,例如:

static int ecb_encrypt(struct skcipher_request *req)
{
  return __ecb_crypt(req, aesbs_ecb_encrypt);
}

将该算法实现注册到系统中:

static int __init aes_init(void)
{
...
  err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
...
}
module_init(aes_init);

小小总结一下, 如果您要增加一个算法实现,那么您就是需要实现定义如下结构体,并调用 crypto_register_xxx()注册到kernel系统中:

  • skcipher_alg
  • akcipher_alg
  • ahash_alg
  • rng_alg
  • aead_alg

3.kernel中实现的算法实现

  • 对称密码底层是怎样实现的?纯软?硬件?Neon指令?CE指令?
  • 非对称密码底层是怎样实现的?
  • Hash、rng、aead 又都是怎样实现的?

实现算法的方式:

(1)在armv8/armv9的芯片中,有ARM-CE指令可以进行aes/hash/md5计算,

(2)在armv8/armv9的芯片中,也有ARM-NEON指令也可以进行aes/hash/md5计算

(3)arm的security IP中,有cryptocell之类的加密芯片

(4)另外SOC厂商也可能集成自己设计的crypto engine加解密芯片

(5)除此之外,还有C语言、汇编程序等编程语言实现的纯软实现

毫无疑问,在效率这块肯定是:(3)(4) > (1) > (2) > (5). 另外从"实现算法的方式" 来看,如果是rng、aead、rsa之类的算法,那么就不能用ARM-CE这种方式,只有编程语言实现、Neon指令实现、crypto engine(含arm security IP)这几种方式了。

kernel怎么玩的?:

针对 crypto engine(含arm security IP) 这种,先当SOC硬件不支持,跳过此场景。

针对rng、aead、rsa,那么kernel有一套纯软的实现 (似乎没有看到arm neon指令的实现)

针对aes、hash,有arm-ce的实现、arm neon指令的实现、纯软的实现,三者三选一(通过宏开关,只能选1)

crypto engine的实现:如果自定义了crypto engine的实现,那么要看你具体的设计,是设计成“取代原有算法实现”,还是设计成“新增算法实现”。如果是前者,那么对于aes/hash,则变成了四选一的了(crypto engine实现、arm-ce的实现、arm neon指令的实现、纯软)。如果是后者,这和原有实现不冲突。

有关aes/hash底层实现三选一的开关:

(1) 开启下面两个宏,使用ARM Neon指令的实现 CONFIG_CRYPTO_AES_ARM64_CE_BLK CONFIG_CRYPTO_AES_ARM64_NEON_BLK

(2) 在(1) 的基础之上,再开启如下宏,使用ARM CE指令的实现 USE_V8_CRYPTO_EXTENSIONS

(3) 以上三个宏都不开启的情况下,使用默认的纯软实现

4.crypto engine的实现

(以ARM Security IP的cryptocell 712为例)

835c68940d887a1c568657add40334e5e2aea3.png

在Linux Kernel中开启 CONFIG_CRYPTO_DEV_CCREE宏控即可起用该实现, 代码路径如下:

f35c442908a023826e0549cae9be2737ee740a.png

以为aes-cbc为例,其实现的名字 和 Kernel中默认是算法实现的名字是一致的,即使这种实现方式是取代原有算法实现

{
  .name = "cbc(aes)",
  .driver_name = "cbc-aes-ccree",
  .blocksize = AES_BLOCK_SIZE,
  .template_skcipher = {
    .setkey = cc_cipher_setkey,
    .encrypt = cc_cipher_encrypt,
    .decrypt = cc_cipher_decrypt,
    .min_keysize = AES_MIN_KEY_SIZE,
    .max_keysize = AES_MAX_KEY_SIZE,
    .ivsize = AES_BLOCK_SIZE,
  },
  .cipher_mode = DRV_CIPHER_CBC,
  .flow_mode = S_DIN_to_AES,
  .min_hw_rev = CC_HW_REV_630,
  .std_body = CC_STD_NIST,
}

5.代码导读

在网络层、算法中间层、算法实现层有着丰富的结构体类型?那么怎么去阅读代码?怎弄清各个层面之间的逻辑呢?事实上我们只要理清这些结构体之间的关系,将其抽象成模型,就会变得更加容易理解了。

如下是以Userspace调用底层的对称密码函数为例总结的一张数据结构图:

6254afc204cc32c4bce862deaab736c13ac145.png

sock通信进入网络层后(algifskcipher.c),构建skcipherrequest结构体,通过该结构体,就能寻址到底层的算法实现,继而完成算法实现的调用。这些总结一下就是:

  • skcipher_request //网络层构建的结构体
  • cryptoskcipher // kernel中间层构建的结构体,如果是kernel层调用底层算法,那么就从构建cryptocipher结构体开始。
  • skcipher_alg //算法实现层的结构体,描述着具体的算法实现,有实现厂商自己添加。

上述复杂的结构体流程,进一步抽象,就变成如下这个样子:

03d2dc4596ade81009a556782ffc21989c8caf.png

既然如此,那么我们还可以举一反三一下:

687db343417680e6df6661690c659c23bf0f8e.png

About Joyk


Aggregate valuable and interesting links.
Joyk means Joy of geeK