GNU系统下内存对齐分配的记录
  qUJOhJyC3pLU 2023年11月02日 40 0


遇到一个内存对齐分配的问题,记录如下:

在 GNU 系统中,malloc 或 realloc 返回的内存块地址都是 8 的倍数(如果是 64 位系统,则为 16 的倍数)。如果你需要更大的粒度,请使用 memalign, valloc或者posx_memalign。这些函数在头文件 “stdlib.h” 中声明。

在 GNU 库中,可以使用函数 free 释放 memalign 和 valloc ,posix_memalign返回的内存块。但无法在 BSD 系统中使用,而且 BSD 系统中并未提供释放这样的内存块的途径。

函数:void * memalign (size_t boundary, size_t size) 函数 memalign 将分配一个由 size 指定大小,地址是 boundary 的倍数的内存块。参数 boundary 必须是 2 的幂!函数 memalign 可以分配较大的内存块,并且可以为返回的地址指定粒度。

函数:void * valloc (size_t size) 使用函数 valloc 与使用函数 memalign 类似,函数 valloc 的内部实现里,使用页的大小作为对齐长度,使用 memalign 来分配内存。它的实现如下所示:

函数:int posix_memalign (void **memptr, size_t alignment, size_t size);和memalign的主要差别在于函数原型,内部实现流程上,基本一致。

我么可以看一下在musl库中三个函数的实现:

GNU系统下内存对齐分配的记录_c++

GNU系统下内存对齐分配的记录_gnu_02

GNU系统下内存对齐分配的记录_#define_03

使用时的注意事项

1、size必须是alignment的2的整数幂次倍,如果要申请按PAGE对齐的内存,则需要通过C库的getpagesize函数获取PAGE大小。

GNU系统下内存对齐分配的记录_gnu_04

而PAGE SIZE的获取更有意思,它最终是由应用加载的时候,从内核中传递出来的:

GNU系统下内存对齐分配的记录_c语言_05

GNU系统下内存对齐分配的记录_#include_06

GNU系统下内存对齐分配的记录_c语言_07

验证:

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <malloc.h>
#include <limits.h>

#define DALLOC_SIZE (1 * 1024 * 1024)

int main(int argc, char **argv)
{
void *p = NULL;
void *p1 = NULL;
void *p2 = NULL;
void *p3 = NULL;

printf("%s line %d, pagesize %d.\n", __func__, __LINE__, getpagesize());

while(1)
{
printf("==================================================================\n");
p= malloc(DALLOC_SIZE);
if(!p)
{
printf("%s line %d, malloc failure.\n", __func__, __LINE__);
return -1;
}
free(p);

printf("%s line %d malloc success, p=%p.\n", __func__, __LINE__, p);

p1= memalign(getpagesize(), DALLOC_SIZE);
if(!p1)
{
printf("%s line %d, malloc failure.\n", __func__, __LINE__);
return -1;
}
free(p1);

printf("%s line %d malloc success, p1=%p.\n", __func__, __LINE__, p1);

int ret = posix_memalign(&p2, getpagesize(), DALLOC_SIZE);
if(ret != 0)
{
printf("%s line %d, malloc failure.\n", __func__, __LINE__);
return -1;
}
free(p2);

printf("%s line %d malloc success, p2=%p.\n", __func__, __LINE__, p2);

p3 = valloc(DALLOC_SIZE);
if(!p3)
{
printf("%s line %d, malloc failure.\n", __func__, __LINE__);
return -1;
}
free(p3);

printf("%s line %d malloc success, p3=%p.\n", __func__, __LINE__, p3);

printf("==================================================================\n");

sleep(1);
}
return 0;
}

验证:

czl@czl-VirtualBox:~/align$ ./a.out 
main line 16, pagesize 4096.
==================================================================
main line 29 malloc success, p=0x7f4f2360b010.
main line 39 malloc success, p1=0x7f4f2360b000.
main line 49 malloc success, p2=0x7f4f2360b000.
main line 59 malloc success, p3=0x7f4f2360b000.
==================================================================
==================================================================
main line 29 malloc success, p=0x5584f9205670.
main line 39 malloc success, p1=0x5584f9206000.
main line 49 malloc success, p2=0x5584f9206000.
main line 59 malloc success, p3=0x5584f9206000.
==================================================================
==================================================================
main line 29 malloc success, p=0x5584f9205670.
main line 39 malloc success, p1=0x5584f9206000.
main line 49 malloc success, p2=0x5584f9206000.
main line 59 malloc success, p3=0x5584f9206000.
==================================================================
==================================================================
main line 29 malloc success, p=0x5584f9205670.
main line 39 malloc success, p1=0x5584f9206000.
main line 49 malloc success, p2=0x5584f9206000.
main line 59 malloc success, p3=0x5584f9206000.
==================================================================
==================================================================
main line 29 malloc success, p=0x5584f9205670.
main line 39 malloc success, p1=0x5584f9206000.
main line 49 malloc success, p2=0x5584f9206000.
main line 59 malloc success, p3=0x5584f9206000.
==================================================================
==================================================================
main line 29 malloc success, p=0x5584f9205670.
main line 39 malloc success, p1=0x5584f9206000.
main line 49 malloc success, p2=0x5584f9206000.
main line 59 malloc success, p3=0x5584f9206000.
==================================================================
==================================================================
main line 29 malloc success, p=0x5584f9205670.
main line 39 malloc success, p1=0x5584f9206000.
main line 49 malloc success, p2=0x5584f9206000.
main line 59 malloc success, p3=0x5584f9206000.
==================================================================
==================================================================
main line 29 malloc success, p=0x5584f9205670.
main line 39 malloc success, p1=0x5584f9206000.
main line 49 malloc success, p2=0x5584f9206000.
main line 59 malloc success, p3=0x5584f9206000.
==================================================================

GNU系统下内存对齐分配的记录_gnu_08

2. 和O_DIRECT的关系:

修改代码,增加O_DIRET模式写的操作,分别作用与四种内存分配上面:

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <malloc.h>
#include <limits.h>
#include <string.h>

#define __USE_GNU 1
#include <fcntl.h>
#include <stdint.h>
#include <sys/mman.h>
#include <errno.h>

#define DALLOC_SIZE (1 * 1024 * 1024)

static void write_odirect_test(unsigned char *p)
{
int fdno;

fdno = open("./new.bin", O_DIRECT|O_RDWR|O_CREAT, 0666);
if(fdno < 0)
{
printf("%s line %d, open file failure.\n", __func__, __LINE__);
return;
}

fallocate(fdno, 1, 0, 100*1024*1024);

if(write(fdno, p, DALLOC_SIZE) != DALLOC_SIZE)
{
printf("%s line %d, write failure, err %s.\n", __func__, __LINE__, strerror(errno));
return;
}

fsync(fdno);
close(fdno);

if(unlink("./new.bin") < 0)
{
printf("%s line %d unlink errpr!\n", __func__, __LINE__);
}
return;
}

int main(int argc, char **argv)
{
void *p = NULL;
void *p1 = NULL;
void *p2 = NULL;
void *p3 = NULL;

printf("%s line %d, pagesize %d.\n", __func__, __LINE__, getpagesize());

while(1)
{
printf("==================================================================\n");
p= malloc(DALLOC_SIZE);
if(!p)
{
printf("%s line %d, malloc failure.\n", __func__, __LINE__);
return -1;
}

write_odirect_test(p);

free(p);

printf("%s line %d malloc success, p=%p.\n", __func__, __LINE__, p);

p1= memalign(getpagesize(), DALLOC_SIZE);
if(!p1)
{
printf("%s line %d, malloc failure.\n", __func__, __LINE__);
return -1;
}

write_odirect_test(p1);

free(p1);

printf("%s line %d malloc success, p1=%p.\n", __func__, __LINE__, p1);

int ret = posix_memalign(&p2, getpagesize(), DALLOC_SIZE);
if(ret != 0)
{
printf("%s line %d, malloc failure.\n", __func__, __LINE__);
return -1;
}

write_odirect_test(p2);

free(p2);

printf("%s line %d malloc success, p2=%p.\n", __func__, __LINE__, p2);

p3 = valloc(DALLOC_SIZE);
if(!p3)
{
printf("%s line %d, malloc failure.\n", __func__, __LINE__);
return -1;
}

write_odirect_test(p3);

free(p3);

printf("%s line %d malloc success, p3=%p.\n", __func__, __LINE__, p3);

printf("==================================================================\n");

sleep(1);

}
return 0;
}

在UBUNTU上测试,发现打印如下,很明显对齐的写失败,其它按页对齐的写都success.

czl@czl-VirtualBox:~/align$ ./a.out 
main line 52, pagesize 4096.
==================================================================
write_odirect_test line 31, write failure, err Invalid argument.
main line 68 malloc success, p=0x7f435acfd010.
main line 81 malloc success, p1=0x7f435acfd000.
main line 94 malloc success, p2=0x7f435acfd000.
main line 107 malloc success, p3=0x7f435acfd000.
==================================================================
==================================================================
write_odirect_test line 31, write failure, err Invalid argument.
main line 68 malloc success, p=0x56318ac6b670.
main line 81 malloc success, p1=0x56318ac6c000.
main line 94 malloc success, p2=0x56318ac6c000.
main line 107 malloc success, p3=0x56318ac6c000.
==================================================================
==================================================================
write_odirect_test line 31, write failure, err Invalid argument.
main line 68 malloc success, p=0x56318ac6b670.
main line 81 malloc success, p1=0x56318ac6c000.
main line 94 malloc success, p2=0x56318ac6c000.
main line 107 malloc success, p3=0x56318ac6c000.
==================================================================
==================================================================
write_odirect_test line 31, write failure, err Invalid argument.
main line 68 malloc success, p=0x56318ac6b670.
main line 81 malloc success, p1=0x56318ac6c000.
main line 94 malloc success, p2=0x56318ac6c000.
main line 107 malloc success, p3=0x56318ac6c000.
==================================================================
==================================================================
write_odirect_test line 31, write failure, err Invalid argument.
main line 68 malloc success, p=0x56318ac6b670.
main line 81 malloc success, p1=0x56318ac6c000.
main line 94 malloc success, p2=0x56318ac6c000.
main line 107 malloc success, p3=0x56318ac6c000.
==================================================================

进一步用strace追踪,发现逐个错误是从系统调返回的:

write(1, "================================"..., 67==================================================================
) = 67
openat(AT_FDCWD, "./new.bin", O_RDWR|O_CREAT|O_DIRECT, 0666) = 7
write(7, "\240\214c<\353\177\0\0\240\214c<\353\177\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = -1 EINVAL (Invalid argument)
write(1, "write_odirect_test line 31, writ"..., 65write_odirect_test line 31, write failure, err Invalid argument.
) = 65
write(1, "main line 68 malloc success, p=0"..., 47main line 68 malloc success, p=0x563b08a0e670.
) = 47
openat(AT_FDCWD, "./new.bin", O_RDWR|O_CREAT|O_DIRECT, 0666) = 8
write(8, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = 1048576
write(1, "main line 81 malloc success, p1="..., 48main line 81 malloc success, p1=0x563b08a0f000.
) = 48
openat(AT_FDCWD, "./new.bin", O_RDWR|O_CREAT|O_DIRECT, 0666) = 8
write(8, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = 1048576
write(1, "main line 94 malloc success, p2="..., 48main line 94 malloc success, p2=0x563b08a0f000.
) = 48
openat(AT_FDCWD, "./new.bin", O_RDWR|O_CREAT|O_DIRECT, 0666) = 8
write(8, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = 1048576
write(1, "main line 107 malloc success, p3"..., 49main line 107 malloc success, p3=0x563b08a0f000.
) = 49
write(1, "================================"..., 67==================================================================
) = 67
write(1, "================================"..., 67==================================================================
) = 67
openat(AT_FDCWD, "./new.bin", O_RDWR|O_CREAT|O_DIRECT, 0666) = 8
write(8, "\240\214c<\353\177\0\0\240\214c<\353\177\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = -1 EINVAL (Invalid argument)
write(1, "write_odirect_test line 31, writ"..., 65write_odirect_test line 31, write failure, err Invalid argument.
) = 65
write(1, "main line 68 malloc success, p=0"..., 47main line 68 malloc success, p=0x563b08a0e670.
) = 47
openat(AT_FDCWD, "./new.bin", O_RDWR|O_CREAT|O_DIRECT, 0666) = 9
write(9, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = 1048576
write(1, "main line 81 malloc success, p1="..., 48main line 81 malloc success, p1=0x563b08a0f000.
) = 48
openat(AT_FDCWD, "./new.bin", O_RDWR|O_CREAT|O_DIRECT, 0666) = 9
write(9, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = 1048576
write(1, "main line 94 malloc success, p2="..., 48main line 94 malloc success, p2=0x563b08a0f000.
) = 48
openat(AT_FDCWD, "./new.bin", O_RDWR|O_CREAT|O_DIRECT, 0666) = 9
write(9, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = 1048576
write(1, "main line 107 malloc success, p3"..., 49main line 107 malloc success, p3=0x563b08a0f000.
) = 49
write(1, "================================"..., 67==================================================================
) = 67

同样的代码,在Tina上运行试试看,如下LOG, 非对其的BUFFER竟然对O_DIRECT没有影响。

原因为止,下一步打算在虚拟机上看一下上面返回出错的具体原因,再来分析。

root@(none):/mnt/extsd# ./main
main line 71, pagesize 4096.
==================================================================
main line 87 malloc success, p=0xb6da2010.
main line 100 malloc success, p1=0xb6da2000.
main line 113 malloc success, p2=0xb6da2000.
main line 126 malloc success, p3=0xb6da2000.
==================================================================
==================================================================
main line 87 malloc success, p=0xb6da2010.
main line 100 malloc success, p1=0xb6da2000.
main line 113 malloc success, p2=0xb6da2000.
main line 126 malloc success, p3=0xb6da2000.
==================================================================
==================================================================
main line 87 malloc success, p=0xb6da2010.
main line 100 malloc success, p1=0xb6da2000.
main line 113 malloc success, p2=0xb6da2000.
main line 126 malloc success, p3=0xb6da2000.
==================================================================
==================================================================
main line 87 malloc success, p=0xb6da2010.
main line 100 malloc success, p1=0xb6da2000.
main line 113 malloc success, p2=0xb6da2000.
main line 126 malloc success, p3=0xb6da2000.
==================================================================
==================================================================
main line 87 malloc success, p=0xb6da2010.
main line 100 malloc success, p1=0xb6da2000.
main line 113 malloc success, p2=0xb6da2000.
main line 126 malloc success, p3=0xb6da2000.
==================================================================
==================================================================
main line 87 malloc success, p=0xb6da2010.
main line 100 malloc success, p1=0xb6da2000.
main line 113 malloc success, p2=0xb6da2000.
main line 126 malloc success, p3=0xb6da2000.
==================================================================
^C
root@(none):/mnt/extsd#

strace跟踪确实在地址非对齐的情况下,仍然成功写进去了:

root@(none):/mnt/extsd# strace -e trace=open,openat,write ./main
open("/usr/lib/eyesee-mpp/libgcc_s.so.1", O_RDONLY|O_LARGEFILE|O_CLOEXEC) = -1 ENOENT (No such file or directory)
open("/etc/ld-musl-armhf.path", O_RDONLY|O_LARGEFILE|O_CLOEXEC) = -1 ENOENT (No such file or directory)
open("/lib/libgcc_s.so.1", O_RDONLY|O_LARGEFILE|O_CLOEXEC) = 3
main line 71, pagesize 4096.
==================================================================
open("./new.bin", O_RDWR|O_CREAT|O_DIRECT|O_LARGEFILE, 0666) = 3
write(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = 1048576
main line 87 malloc success, p=0xb6df5010.
open("./new.bin", O_RDWR|O_CREAT|O_DIRECT|O_LARGEFILE, 0666) = 3
write(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = 1048576
main line 100 malloc success, p1=0xb6df5000.
open("./new.bin", O_RDWR|O_CREAT|O_DIRECT|O_LARGEFILE, 0666) = 3
write(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = 1048576
main line 113 malloc success, p2=0xb6df5000.
open("./new.bin", O_RDWR|O_CREAT|O_DIRECT|O_LARGEFILE, 0666) = 3
write(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = 1048576
main line 126 malloc success, p3=0xb6df5000.
==================================================================
==================================================================
open("./new.bin", O_RDWR|O_CREAT|O_DIRECT|O_LARGEFILE, 0666) = 3
write(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = 1048576
main line 87 malloc success, p=0xb6df5010.
open("./new.bin", O_RDWR|O_CREAT|O_DIRECT|O_LARGEFILE, 0666) = 3
write(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = 1048576
main line 100 malloc success, p1=0xb6df5000.
open("./new.bin", O_RDWR|O_CREAT|O_DIRECT|O_LARGEFILE, 0666) = 3
write(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = 1048576
main line 113 malloc success, p2=0xb6df5000.
open("./new.bin", O_RDWR|O_CREAT|O_DIRECT|O_LARGEFILE, 0666) = 3
write(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = 1048576
main line 126 malloc success, p3=0xb6df5000.
==================================================================
==================================================================
open("./new.bin", O_RDWR|O_CREAT|O_DIRECT|O_LARGEFILE, 0666) = 3
write(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = 1048576
main line 87 malloc success, p=0xb6df5010.
open("./new.bin", O_RDWR|O_CREAT|O_DIRECT|O_LARGEFILE, 0666) = 3
write(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = 1048576
main line 100 malloc success, p1=0xb6df5000.
open("./new.bin", O_RDWR|O_CREAT|O_DIRECT|O_LARGEFILE, 0666) = 3
write(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = 1048576
main line 113 malloc success, p2=0xb6df5000.
open("./new.bin", O_RDWR|O_CREAT|O_DIRECT|O_LARGEFILE, 0666) = 3
write(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = 1048576
main line 126 malloc success, p3=0xb6df5000.
==================================================================
^Cstrace: Process 948 detached

root@(none):/mnt/extsd#

回头给出上面疑问的分析,在内核中加入调试打印,重新编译内核,重启动UBUNTU

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 434cffcc0..c4eea337c 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1190,7 +1190,13 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
blkbits = blksize_bits(bdev_logical_block_size(bdev));
blocksize_mask = (1 << blkbits) - 1;
if (align & blocksize_mask)
+ {
+ if(strcmp("a.out", current->comm) == 0)
+ {
+ printk("%s line %d.align 0x%lx.mask 0x%x, blkbits %d\n", __func__, __LINE__, align, blocksize_mask, blkbits);
+ }
goto out;
+ }
}

/* watch out for a 0 len io from a tricksy fs */
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 1513e90fb..298b9ac3d 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -226,7 +226,14 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
ssize_t ret;

if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ {
+ if(strcmp("a.out", current->comm) == 0)
+ {
+ printk("%s line %d.\n", __func__, __LINE__);
+ }
+
return -EIO;
+ }

#ifdef CONFIG_FS_DAX
if (IS_DAX(inode))
@@ -235,7 +242,15 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)

if (!inode_trylock(inode)) {
if (iocb->ki_flags & IOCB_NOWAIT)
+ {
+ if(strcmp("a.out", current->comm) == 0)
+ {
+ printk("%s line %d.\n", __func__, __LINE__);
+ }
+
return -EAGAIN;
+ }
+
inode_lock(inode);
}

@@ -268,6 +283,11 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
}

ret = __generic_file_write_iter(iocb, from);
+
+ if(strcmp("a.out", current->comm) == 0)
+ {
+ printk("%s line %d.ret %ld.\n", __func__, __LINE__, ret);
+ }
/*
* Unaligned direct AIO must be the only IO in flight. Otherwise
* overlapping aligned IO after unaligned might result in data
@@ -280,10 +300,19 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (ret > 0)
ret = generic_write_sync(iocb, ret);

+ if(strcmp("a.out", current->comm) == 0)
+ {
+ printk("%s line %d.ret %ld.\n", __func__, __LINE__, ret);
+ }
+
return ret;

out:
inode_unlock(inode);
+ if(strcmp("a.out", current->comm) == 0)
+ {
+ printk("%s line %d.ret %ld.\n", __func__, __LINE__, ret);
+ }
return ret;
}

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 1429d01d8..fd43ed1b4 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3763,9 +3763,17 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
get_block_func = ext4_dio_get_block_unwritten_async;
dio_flags = DIO_LOCKING;
}
+ if(strcmp("a.out", current->comm) == 0)
+ {
+ printk("%s line %d.ret %ld.\n", __func__, __LINE__, ret);
+ }
ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
get_block_func, ext4_end_io_dio, NULL,
dio_flags);
+ if(strcmp("a.out", current->comm) == 0)
+ {
+ printk("%s line %d.ret %ld.\n", __func__, __LINE__, ret);
+ }

if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
EXT4_STATE_DIO_UNWRITTEN)) {
diff --git a/fs/read_write.c b/fs/read_write.c
index 7458fccc5..2db56b455 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -490,12 +490,20 @@ static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t
static ssize_t __vfs_write(struct file *file, const char __user *p,
size_t count, loff_t *pos)
{
+ ssize_t ret;
if (file->f_op->write)
- return file->f_op->write(file, p, count, pos);
+ ret = file->f_op->write(file, p, count, pos);
else if (file->f_op->write_iter)
- return new_sync_write(file, p, count, pos);
+ ret = new_sync_write(file, p, count, pos);
else
- return -EINVAL;
+ ret = -EINVAL;
+
+ if(strcmp("a.out", current->comm) == 0)
+ {
+ printk("%s line %d. ret = %ld\n", __func__, __LINE__, ret);
+ }
+
+ return ret;
}

ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos)
diff --git a/mm/filemap.c b/mm/filemap.c
index c10e237cc..2b287f146 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3189,6 +3189,10 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
} else {
written = filemap_write_and_wait_range(mapping, pos,
pos + write_len - 1);
+ if(strcmp("a.out", current->comm) == 0)
+ {
+ printk("%s line %d.ret %ld.\n", __func__, __LINE__, written);
+ }
if (written)
goto out;
}
@@ -3208,10 +3212,22 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
if (written) {
if (written == -EBUSY)
return 0;
+ if(strcmp("a.out", current->comm) == 0)
+ {
+ printk("%s line %d.ret %ld.\n", __func__, __LINE__, written);
+ }
goto out;
}

+ if(strcmp("a.out", current->comm) == 0)
+ {
+ printk("%s line %d.ret %ld.\n", __func__, __LINE__, written);
+ }
written = mapping->a_ops->direct_IO(iocb, from);
+ if(strcmp("a.out", current->comm) == 0)
+ {
+ printk("%s line %d.ret %ld.\n", __func__, __LINE__, written);
+ }

/*
* Finally, try again to invalidate clean pages which might have been
@@ -3385,16 +3401,34 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
current->backing_dev_info = inode_to_bdi(inode);
err = file_remove_privs(file);
if (err)
+ {
+ if(strcmp("a.out", current->comm) == 0)
+ {
+ printk("%s line %d.ret %ld.\n", __func__, __LINE__, err);
+ }
goto out;
+ }

err = file_update_time(file);
if (err)
+ {
+ if(strcmp("a.out", current->comm) == 0)
+ {
+ printk("%s line %d.ret %ld.\n", __func__, __LINE__, err);
+ }
goto out;
+ }

if (iocb->ki_flags & IOCB_DIRECT) {
loff_t pos, endbyte;

+
written = generic_file_direct_write(iocb, from);
+
+ if(strcmp("a.out", current->comm) == 0)
+ {
+ printk("%s line %d.ret %ld.\n", __func__, __LINE__, written);
+ }
/*
* If the write stopped short of completing, fall back to
* buffered writes. Some filesystems do this for writes to
@@ -3415,6 +3449,10 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
*/
if (unlikely(status < 0)) {
err = status;
+ if(strcmp("a.out", current->comm) == 0)
+ {
+ printk("%s line %d.ret %ld.\n", __func__, __LINE__, err);
+ }
goto out;
}
/*
@@ -3435,6 +3473,10 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
* We don't know how much we wrote, so just return
* the number of bytes which were direct-written
*/
+ if(strcmp("a.out", current->comm) == 0)
+ {
+ printk("%s line %d.ret %ld.\n", __func__, __LINE__, err);
+ }
}
} else {
written = generic_perform_write(file, from, iocb->ki_pos);

应用LOG如下:

GNU系统下内存对齐分配的记录_c语言_09

DMESG得到LOG如下:

[   31.506164] generic_file_direct_write line 3194.ret 0.
[ 31.506166] generic_file_direct_write line 3224.ret 0.
[ 31.506171] ext4_direct_IO_write line 3768.ret 0.
[ 31.506174] do_blockdev_direct_IO line 1196.align 0x5617a07d2670.mask 0x1ff, blkbits 9
[ 31.506176] ext4_direct_IO_write line 3775.ret -22.
[ 31.506201] generic_file_direct_write line 3229.ret -22.
[ 31.506202] __generic_file_write_iter line 3430.ret -22.
[ 31.506203] ext4_file_write_iter line 289.ret -22.
[ 31.506205] ext4_file_write_iter line 305.ret -22.
[ 31.506206] __vfs_write line 503. ret = -22
[ 31.506227] __vfs_write line 503. ret = 65
[ 31.506235] __vfs_write line 503. ret = 47
[ 32.506385] __vfs_write line 503. ret = 67
[ 32.508763] generic_file_direct_write line 3194.ret 0.
[ 32.508765] generic_file_direct_write line 3224.ret 0.
[ 32.508771] ext4_direct_IO_write line 3768.ret 0.
[ 32.508774] do_blockdev_direct_IO line 1196.align 0x5617a07d2670.mask 0x1ff, blkbits 9
[ 32.508776] ext4_direct_IO_write line 3775.ret -22.
[ 32.508803] generic_file_direct_write line 3229.ret -22.
[ 32.508804] __generic_file_write_iter line 3430.ret -22.
[ 32.508806] ext4_file_write_iter line 289.ret -22.
[ 32.508807] ext4_file_write_iter line 305.ret -22.
[ 32.508808] __vfs_write line 503. ret = -22
[ 32.508832] __vfs_write line 503. ret = 65
[ 32.508840] __vfs_write line 503. ret = 47

可以看到,内核打印的ALIGN和用户态的指针完全相同,对齐的MASK为(1<<9) - 1 = 0x1ff = 511

也就是512字节对齐,其并不满足。返回的位置在下图所示,这说明想用direct io的话,传递给 read / write系统调用的buffer必须按照512字节对齐。

更多的验证:

GNU系统下内存对齐分配的记录_#include_10

复杂的对齐逻辑可以看下面这个函数

align的判断逻辑是或逻辑,也就是说,buffer指针,文件offset,以及读写大小,都必须需要512字节对齐才行,任何一个不对其,就会触发下面的逻辑。

GNU系统下内存对齐分配的记录_c++_11

判断代码:

判断ptr指针对齐的逻辑,文件偏移为0,写1M数据.

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <malloc.h>
#include <limits.h>
#include <string.h>
#include <math.h>

#define __USE_GNU 1
#include <fcntl.h>
#include <stdint.h>
#include <sys/mman.h>
#include <errno.h>

#define DALLOC_SIZE (1 * 1024 * 1024)
static int write_odirect_test(unsigned char *p)
{
int fdno;

fdno = open("./new.bin", O_DIRECT|O_RDWR|O_CREAT, 0666);
if(fdno < 0)
{
printf("%s line %d, open file failure.\n", __func__, __LINE__);
return -1;
}

fallocate(fdno, 1, 0, 100*1024*1024);

if(write(fdno, p, DALLOC_SIZE) != DALLOC_SIZE)
{
printf("%s line %d, write failure, err %s.\n", __func__, __LINE__, strerror(errno));
return -1;
}

close(fdno);
if(unlink("./new.bin") < 0)
{
printf("%s line %d unlink errpr!\n", __func__, __LINE__);
return -1;
}

return 0;
}

int main(int argc, char **argv)
{
void *p2 = NULL;
int align;
int i;

printf("%s line %d, pagesize %d.\n", __func__, __LINE__, getpagesize());

for(i = 0; i < 16; i ++)
{
align = pow(2, i + 3);
printf("=============================%d=================================\n", align);
int ret = posix_memalign(&p2, align, DALLOC_SIZE);
if(ret != 0)
{
printf("%s line %d, malloc failure.\n", __func__, __LINE__);
return -1;
}

printf("%s line %d malloc success, p2=%p.\n", __func__, __LINE__, p2);
if(write_odirect_test(p2) != 0)
{
printf("%s line %d, align %d, failure.\n", __func__, __LINE__, align);
}
else
{
printf("%s line %d, align %d, success.\n", __func__, __LINE__, align);
}

free(p2);


printf("==================================================================\n");
}

sleep(1);
return 0;
}
caozilong@AwExdroid65:~/WorkSpace/alloc_memory$ ./a.out 
main line 51, pagesize 4096.
=============================8=================================
main line 64 malloc success, p2=0x7f7403891010.
write_odirect_test line 31, write failure, err Invalid argument.
main line 67, align 8, failure.
==================================================================
=============================16=================================
main line 64 malloc success, p2=0x118f010.
write_odirect_test line 31, write failure, err Invalid argument.
main line 67, align 16, failure.
==================================================================
=============================32=================================
main line 64 malloc success, p2=0x118f040.
write_odirect_test line 31, write failure, err Invalid argument.
main line 67, align 32, failure.
==================================================================
=============================64=================================
main line 64 malloc success, p2=0x118f040.
write_odirect_test line 31, write failure, err Invalid argument.
main line 67, align 64, failure.
==================================================================
=============================128=================================
main line 64 malloc success, p2=0x118f080.
write_odirect_test line 31, write failure, err Invalid argument.
main line 67, align 128, failure.
==================================================================
=============================256=================================
main line 64 malloc success, p2=0x118f100.
write_odirect_test line 31, write failure, err Invalid argument.
main line 67, align 256, failure.
==================================================================
=============================512=================================
main line 64 malloc success, p2=0x118f200.
main line 71, align 512, success.
==================================================================
=============================1024=================================
main line 64 malloc success, p2=0x118f400.
main line 71, align 1024, success.
==================================================================
=============================2048=================================
main line 64 malloc success, p2=0x118f800.
main line 71, align 2048, success.
==================================================================
=============================4096=================================
main line 64 malloc success, p2=0x1190000.
main line 71, align 4096, success.
==================================================================
=============================8192=================================
main line 64 malloc success, p2=0x1190000.
main line 71, align 8192, success.
==================================================================
=============================16384=================================
main line 64 malloc success, p2=0x1190000.
main line 71, align 16384, success.
==================================================================
=============================32768=================================
main line 64 malloc success, p2=0x1190000.
main line 71, align 32768, success.
==================================================================
=============================65536=================================
main line 64 malloc success, p2=0x1190000.
main line 71, align 65536, success.
==================================================================
=============================131072=================================
main line 64 malloc success, p2=0x11a0000.
main line 71, align 131072, success.
==================================================================
=============================262144=================================
main line 64 malloc success, p2=0x7f7403880000.
main line 71, align 262144, success.
==================================================================
caozilong@AwExdroid65:~/WorkSpace/alloc_memory$

判断文件pos的逻辑,ptr 512对齐,写1M数据.

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <malloc.h>
#include <limits.h>
#include <string.h>
#include <math.h>

#define __USE_GNU 1
#include <fcntl.h>
#include <stdint.h>
#include <sys/mman.h>
#include <errno.h>

#define DALLOC_SIZE (1 * 1024 * 1024)
static int write_odirect_test(unsigned char *p)
{
int fdno;

fdno = open("./new.bin", O_DIRECT|O_RDWR|O_CREAT, 0666);
if(fdno < 0)
{
printf("%s line %d, open file failure.\n", __func__, __LINE__);
return -1;
}

fallocate(fdno, 1, 0, 100*1024*1024);

int pos = 0;

for(pos = 0; pos <= 4096; pos ++)
{
lseek(fdno, pos, SEEK_SET);

int cur = lseek(fdno, 0, SEEK_CUR);
if(write(fdno, p, DALLOC_SIZE) != DALLOC_SIZE)
{
/*printf("%s line %d, write failure, err %s, off %d.\n", __func__, __LINE__, strerror(errno), pos);*/
/*return -1;*/
}
else
{
printf("%s line %d, write success, off %d, cur %d.\n", __func__, __LINE__, pos, cur);
}
}

close(fdno);

if(unlink("./new.bin") < 0)
{
printf("%s line %d unlink errpr!\n", __func__, __LINE__);
return -1;
}

return 0;
}

int main(int argc, char **argv)
{
void *p2 = NULL;
int align;
int i;

printf("%s line %d, pagesize %d.\n", __func__, __LINE__, getpagesize());

//for(i = 0; i < 16; i ++)
{
//align = pow(2, i + 3);
align = 512;
printf("=============================%d=================================\n", align);
int ret = posix_memalign(&p2, align, DALLOC_SIZE);
if(ret != 0)
{
printf("%s line %d, malloc failure.\n", __func__, __LINE__);
return -1;
}

printf("%s line %d malloc success, p2=%p.\n", __func__, __LINE__, p2);
if(write_odirect_test(p2) != 0)
{
printf("%s line %d, align %d, failure.\n", __func__, __LINE__, align);
}
else
{
printf("%s line %d, align %d, success.\n", __func__, __LINE__, align);
}

free(p2);


printf("==================================================================\n");
}

sleep(1);
return 0;
}
caozilong@AwExdroid65:~/WorkSpace/alloc_memory$ ./a.out 
main line 64, pagesize 4096.
=============================512=================================
main line 78 malloc success, p2=0x7f6d6ed52200.
write_odirect_test line 43, write success, off 0, cur 0.
write_odirect_test line 43, write success, off 512, cur 512.
write_odirect_test line 43, write success, off 1024, cur 1024.
write_odirect_test line 43, write success, off 1536, cur 1536.
write_odirect_test line 43, write success, off 2048, cur 2048.
write_odirect_test line 43, write success, off 2560, cur 2560.
write_odirect_test line 43, write success, off 3072, cur 3072.
write_odirect_test line 43, write success, off 3584, cur 3584.
write_odirect_test line 43, write success, off 4096, cur 4096.
main line 85, align 512, success.
==================================================================
caozilong@AwExdroid65:~/WorkSpace/alloc_memory$

可以看到,position也必须512字节对齐.

长度测试,ptr512对齐,pos512对齐,只有长度变化,可以看到,也要求512字节对齐.

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <malloc.h>
#include <limits.h>
#include <string.h>
#include <math.h>

#define __USE_GNU 1
#include <fcntl.h>
#include <stdint.h>
#include <sys/mman.h>
#include <errno.h>

#define DALLOC_SIZE (1 * 1024 * 1024)
static int write_odirect_test(unsigned char *p)
{
int fdno;

fdno = open("./new.bin", O_DIRECT|O_RDWR|O_CREAT, 0666);
if(fdno < 0)
{
printf("%s line %d, open file failure.\n", __func__, __LINE__);
return -1;
}

fallocate(fdno, 1, 0, 100*1024*1024);

int len = 0;

for(len = 0; len <= 4096; len ++)
{
lseek(fdno, 0, SEEK_SET);

int cur = lseek(fdno, 0, SEEK_CUR);
if(write(fdno, p, len) != len)
{
/*printf("%s line %d, write failure, err %s, off %d.\n", __func__, __LINE__, strerror(errno), pos);*/
/*return -1;*/
}
else
{
printf("%s line %d, write success, len %d, cur %d.\n", __func__, __LINE__, len, cur);
}
}

close(fdno);

if(unlink("./new.bin") < 0)
{
printf("%s line %d unlink errpr!\n", __func__, __LINE__);
return -1;
}

return 0;
}

int main(int argc, char **argv)
{
void *p2 = NULL;
int align;
int i;

printf("%s line %d, pagesize %d.\n", __func__, __LINE__, getpagesize());

//for(i = 0; i < 16; i ++)
{
//align = pow(2, i + 3);
align = 512;
printf("=============================%d=================================\n", align);
int ret = posix_memalign(&p2, align, DALLOC_SIZE);
if(ret != 0)
{
printf("%s line %d, malloc failure.\n", __func__, __LINE__);
return -1;
}

printf("%s line %d malloc success, p2=%p.\n", __func__, __LINE__, p2);
if(write_odirect_test(p2) != 0)
{
printf("%s line %d, align %d, failure.\n", __func__, __LINE__, align);
}
else
{
printf("%s line %d, align %d, success.\n", __func__, __LINE__, align);
}

free(p2);


printf("==================================================================\n");
}

sleep(1);
return 0;
}
czl@czl-VirtualBox:~/WorkSpace/changdu$ ./a.out 
main line 64, pagesize 4096.
=============================512=================================
main line 78 malloc success, p2=0x7f070f72b200.
write_odirect_test line 43, write success, len 0, cur 0.
write_odirect_test line 43, write success, len 512, cur 0.
write_odirect_test line 43, write success, len 1024, cur 0.
write_odirect_test line 43, write success, len 1536, cur 0.
write_odirect_test line 43, write success, len 2048, cur 0.
write_odirect_test line 43, write success, len 2560, cur 0.
write_odirect_test line 43, write success, len 3072, cur 0.
write_odirect_test line 43, write success, len 3584, cur 0.
write_odirect_test line 43, write success, len 4096, cur 0.
main line 85, align 512, success.
==================================================================
czl@czl-VirtualBox:~/WorkSpace/changdu$

总之,在ubuntu 上,O_DIRECT模式要求写操作必须是buffer地址,文件position以及文件长度,都按照512字节对齐或者是512字节的整数倍。

读应该也是如此,需要验证,下面我们仅仅验证LEN不512字节对齐的情况,注意这里的fallocate的mode要设置为0, 这样,才会设置文件的SIZE为tructate size,否则,模式为1的话,SIZE还是0,这不会对写测试造成影响。但是会对读造成影响。

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <malloc.h>
#include <limits.h>
#include <string.h>
#include <math.h>

#define __USE_GNU 1
#include <fcntl.h>
#include <stdint.h>
#include <sys/mman.h>
#include <errno.h>

#define DALLOC_SIZE (1 * 1024 * 1024)
#define ALLOCA_SIZE (100*1024*1024)
static int write_odirect_test(unsigned char *p)
{
int fdno;

fdno = open("./new.bin", O_DIRECT|O_RDWR|O_CREAT, 0666);
if(fdno < 0)
{
printf("%s line %d, open file failure.\n", __func__, __LINE__);
return -1;
}

fallocate(fdno, 0, 0, ALLOCA_SIZE);
//close(fdno);
//return 0;

int len = 0;

for(len = 0; len <= 4096; len ++)
{
lseek(fdno, 0, SEEK_SET);
int cur = lseek(fdno, 0, SEEK_CUR);

if(read(fdno, p, len) != len)
{
/*printf("%s line %d, write failure, err %s, off %d.\n", __func__, __LINE__, strerror(errno), pos);*/
/*return -1;*/
}
else
{
printf("%s line %d, read success, len %d. cur %d ptr %p.\n", __func__, __LINE__, len, cur, p);
}
}

close(fdno);

if(unlink("./new.bin") < 0)
{
printf("%s line %d unlink errpr!\n", __func__, __LINE__);
return -1;
}

return 0;
}

int main(int argc, char **argv)
{
void *p2 = NULL;
int align;
int i;

printf("%s line %d, pagesize %d.\n", __func__, __LINE__, getpagesize());

//for(i = 0; i < 16; i ++)
{
//align = pow(2, i + 3);
align = 512;
printf("=============================%d=================================\n", align);
int ret = posix_memalign(&p2, align, DALLOC_SIZE);
if(ret != 0)
{
printf("%s line %d, malloc failure.\n", __func__, __LINE__);
return -1;
}

printf("%s line %d malloc success, p2=%p.\n", __func__, __LINE__, p2);
if(write_odirect_test(p2) != 0)
{
printf("%s line %d, align %d, failure.\n", __func__, __LINE__, align);
}
else
{
printf("%s line %d, align %d, success.\n", __func__, __LINE__, align);
}

free(p2);


printf("==================================================================\n");
}

sleep(1);
return 0;
}
czl@czl-VirtualBox:~/WorkSpace/changdu$ ./a.out 
main line 67, pagesize 4096.
=============================512=================================
main line 81 malloc success, p2=0x7f6fc17ac200.
write_odirect_test line 46, read success, len 0. cur 0 ptr 0x7f6fc17ac200.
write_odirect_test line 46, read success, len 512. cur 0 ptr 0x7f6fc17ac200.
write_odirect_test line 46, read success, len 1024. cur 0 ptr 0x7f6fc17ac200.
write_odirect_test line 46, read success, len 1536. cur 0 ptr 0x7f6fc17ac200.
write_odirect_test line 46, read success, len 2048. cur 0 ptr 0x7f6fc17ac200.
write_odirect_test line 46, read success, len 2560. cur 0 ptr 0x7f6fc17ac200.
write_odirect_test line 46, read success, len 3072. cur 0 ptr 0x7f6fc17ac200.
write_odirect_test line 46, read success, len 3584. cur 0 ptr 0x7f6fc17ac200.
write_odirect_test line 46, read success, len 4096. cur 0 ptr 0x7f6fc17ac200.
main line 88, align 512, success.
==================================================================
czl@czl-VirtualBox:~/WorkSpace/changdu$

可以看到,LEN仍然是要512对齐的!

在Tina下验证:

由于tina下的ftruncate语义貌似不同,所以手工创建一个100m的文件共测试用例读写,否则,一个默认为0size的文件会触发内核下面的return逻辑。窃以为造成这种差异的主要原因是文件系统的不同,在PC上测试用例访问的ext4文件系统,进入的是ext4_direct_IO逻辑,而Tina上,TF卡是FAT文件系统,访问的是fat_direct_IO,内部会有如下的逻辑判断。而且通过MSDOS_I宏可以明显看出,这个 mmu_private 变量是FAT文件系统私有的,其它的文件系统没有。

GNU系统下内存对齐分配的记录_c++_12

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <malloc.h>
#include <limits.h>
#include <string.h>
#include <math.h>

#define __USE_GNU 1
#include <fcntl.h>
#include <stdint.h>
#include <sys/mman.h>
#include <errno.h>

#define DALLOC_SIZE (1 * 1024 * 1024)
static int write_odirect_test(unsigned char *p)
{
int fdno;

fdno = open("./new.bin", O_DIRECT|O_RDWR, 0666);
/*fdno = open("./new.bin", O_DIRECT|O_RDWR|O_CREAT, 0666);*/
if(fdno < 0)
{
printf("%s line %d, open file failure.\n", __func__, __LINE__);
return -1;
}

//fallocate(fdno, 1, 0, 100*1024*1024);
/*fallocate(fdno, 0, 0, 100*1024*1024);*/
/*close(fdno);*/
/*return 0;*/

int pos = 0;

for(pos = 0; pos <= 4096; pos ++)
{
lseek(fdno, pos, SEEK_SET);

int cur = lseek(fdno, 0, SEEK_CUR);
if(write(fdno, p, DALLOC_SIZE) != DALLOC_SIZE)
{
/*printf("%s line %d, write failure, err %s, off %d.\n", __func__, __LINE__, strerror(errno), pos);*/
/*return -1;*/
}
else
{
printf("%s line %d, write success, off %d, cur %d.\n", __func__, __LINE__, pos, cur);
}
}

close(fdno);

/*
* if(unlink("./new.bin") < 0)
* {
* printf("%s line %d unlink errpr!\n", __func__, __LINE__);
* return -1;
* }
*
*/
return 0;
}

int main(int argc, char **argv)
{
void *p2 = NULL;
int align;
int i;

printf("%s line %d, pagesize %d.\n", __func__, __LINE__, getpagesize());

//for(i = 0; i < 16; i ++)
{
//align = pow(2, i + 3);
align = 512;
printf("=============================%d=================================\n", align);
int ret = posix_memalign(&p2, align, DALLOC_SIZE);
if(ret != 0)
{
printf("%s line %d, malloc failure.\n", __func__, __LINE__);
return -1;
}

printf("%s line %d malloc success, p2=%p.\n", __func__, __LINE__, p2);
if(write_odirect_test(p2) != 0)
{
printf("%s line %d, align %d, failure.\n", __func__, __LINE__, align);
}
else
{
printf("%s line %d, align %d, success.\n", __func__, __LINE__, align);
}

free(p2);


printf("==================================================================\n");
}

sleep(1);
return 0;
}

GNU系统下内存对齐分配的记录_#define_13


最后的问题:

文件大小如果不是512字节的整数倍,当读最后一笔数据时,len还一定要512对齐吗?

先说答案,要,一定要,前面的逻辑也说明了,判断align的条件只看ptr, length, offset的或结果,任何一个参数不对齐,都会触发返回。用户态接收到invalid parameter.

那怎么解决这个问题的呢?答案是:虽然三个参数务必要对齐,但是,返回的不是对齐后的大小,而是最后一笔数据的实际值,这样就没问题了。

验证用例:

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <malloc.h>
#include <limits.h>
#include <string.h>
#include <math.h>

#define __USE_GNU 1
#include <fcntl.h>
#include <stdint.h>
#include <sys/mman.h>
#include <errno.h>

#define DALLOC_SIZE (1 * 1024 * 1024)
#define ALLOCA_SIZE (100*1024*1024 + 100)
static int write_odirect_test(unsigned char *p)
{
int fdno;

fdno = open("./new.bin", O_DIRECT|O_RDWR|O_CREAT, 0666);
if(fdno < 0)
{
printf("%s line %d, open file failure.\n", __func__, __LINE__);
return -1;
}

fallocate(fdno, 0, 0, ALLOCA_SIZE);
//close(fdno);
//return 0;

int len = 0;
int retlen = 0;
int cur = 0;

lseek(fdno, 0, SEEK_SET);
//for(len = 0; len <= 4096; len ++)
#if 1
while(1)
{
cur = lseek(fdno, 0, SEEK_CUR);
if((retlen = read(fdno, p, 512)) != 512)
{
printf("%s line %d, read failure, err %s, cur 0x%x retlen %d.\n", __func__, __LINE__, strerror(errno), cur,retlen);
return -1;
}
else
{
printf("%s line %d, read success, len %d. cur 0x%x ptr %p, retlen %d.\n", __func__, __LINE__, 512, cur, p, retlen);
}
}
#else
lseek(fdno, 0x6400000, SEEK_SET);
cur = lseek(fdno, 0, SEEK_CUR);
retlen = read(fdno, p, 512);

printf("%s line %d, cur 0x%x, retlen %d.tips:%s.\n", __func__, __LINE__, cur, retlen, strerror(errno));
#endif

close(fdno);

if(unlink("./new.bin") < 0)
{
printf("%s line %d unlink errpr!\n", __func__, __LINE__);
return -1;
}

return 0;
}

int main(int argc, char **argv)
{
void *p2 = NULL;
int align;
int i;

printf("%s line %d, pagesize %d.\n", __func__, __LINE__, getpagesize());

//for(i = 0; i < 16; i ++)
{
//align = pow(2, i + 3);
align = 512;
printf("=============================%d=================================\n", align);
int ret = posix_memalign(&p2, align, DALLOC_SIZE);
if(ret != 0)
{
printf("%s line %d, malloc failure.\n", __func__, __LINE__);
return -1;
}

printf("%s line %d malloc success, p2=%p.\n", __func__, __LINE__, p2);
if(write_odirect_test(p2) != 0)
{
printf("%s line %d, align %d, failure.\n", __func__, __LINE__, align);
}
else
{
printf("%s line %d, align %d, success.\n", __func__, __LINE__, align);
}

free(p2);


printf("==================================================================\n");
}

sleep(1);
return 0;
}

运行结果,文件大小是100M+100字节,可以看到最后一笔实际上是读成功了,读出来的长度是100,正好是最后一笔非512对齐的数据大小。但是传递的参数都是512对齐的。

GNU系统下内存对齐分配的记录_c语言_14

造成fat和ext4表现差异的原因,关键在于不同的文件系统direct_IO函数指针的处理不同,可以按照下图的逻辑去分析。

GNU系统下内存对齐分配的记录_#include_15


结束! 

【版权声明】本文内容来自摩杜云社区用户原创、第三方投稿、转载,内容版权归原作者所有。本网站的目的在于传递更多信息,不拥有版权,亦不承担相应法律责任。如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱: cloudbbs@moduyun.com

上一篇: CentOS安装 GIT 下一篇: Chromium Browser Evaluation
  1. 分享:
最后一次编辑于 2023年11月08日 0

暂无评论

推荐阅读
  JBfJ5LpBD0AJ   2023年11月13日   23   0   0 初始化链表#define
  HE3leaVn7jMN   2023年11月24日   28   0   0 Timei++#include
  HE3leaVn7jMN   2023年11月26日   28   0   0 i++#include
  3M67F8YJLxn2   2023年11月13日   29   0   0 vimgitc++
qUJOhJyC3pLU