!502 feat:C库典型高频函数优化
Merge pull request !502 from Zhaotianyu/0731libc_opt
This commit is contained in:
commit
70a8901c72
|
@ -60,6 +60,13 @@ kernel_module(module_name) {
|
|||
"$OPTRTDIR/string/arm/strcmp.S",
|
||||
"$OPTRTDIR/string/arm/strcpy.c",
|
||||
"$OPTRTDIR/string/arm/strlen-armv6t2.S",
|
||||
"src/arch/arm/memset.S",
|
||||
"src/arch/arm/memcmp.S",
|
||||
]
|
||||
} else {
|
||||
sources += [
|
||||
"src/memset.c",
|
||||
"src/memcmp.c",
|
||||
]
|
||||
}
|
||||
|
||||
|
|
|
@ -9,6 +9,9 @@ MUSLPORTINGDIR = $(MUSLDIR)/porting/liteos_a/kernel
|
|||
OPTRTDIR = $(TOPDIR)/third_party/optimized-routines
|
||||
|
||||
MUSL_SRCS =
|
||||
LOCAL_OPT_DIR = src/arch/$(ARCH)
|
||||
LOCAL_OPT_SRCS =
|
||||
LOCAL_FILTER_SRCS =
|
||||
|
||||
MUSLPORTING_SRCS = \
|
||||
src/*/$(ARCH)/*.[csS] \
|
||||
|
@ -16,6 +19,13 @@ MUSLPORTING_SRCS = \
|
|||
|
||||
LOCAL_SRCS = $(wildcard $(addprefix $(MUSLPORTINGDIR)/,$(MUSLPORTING_SRCS)))
|
||||
LOCAL_SRCS := $(filter-out $(subst $(MUSLPORTINGDIR),$(MUSLDIR),$(LOCAL_SRCS)),$(addprefix $(MUSLDIR)/,$(MUSL_SRCS)) $(LOCAL_SRCS))
|
||||
LOCAL_SRCS += $(wildcard src/*.c src/*.S)
|
||||
# Sources optimized for specific architectures
|
||||
LOCAL_OPT_SRCS := $(wildcard $(LOCAL_OPT_DIR)/*.c, $(LOCAL_OPT_DIR)/*.S)
|
||||
# Enumerate common src files with the same name as the optimized srcs
|
||||
LOCAL_FILTER_SRCS := $(addprefix $(MUSLPORTINGDIR)/src/string/,$(subst $(LOCAL_OPT_DIR)/,,$(LOCAL_OPT_SRCS)))
|
||||
LOCAL_FILTER_SRCS += $(addprefix src/,$(subst $(LOCAL_OPT_DIR)/,,$(LOCAL_OPT_SRCS)))
|
||||
LOCAL_FILTER_SRCS := $(subst .S,.c, $(LOCAL_FILTER_SRCS))
|
||||
|
||||
ifeq ($(LOSCFG_ARCH_ARM_VER), "armv7-a")
|
||||
LOCAL_SRCS := $(filter-out $(addprefix $(MUSLPORTINGDIR)/src/string/,memchr.c memcpy.c strcmp.c strcpy.c strlen.c), $(LOCAL_SRCS))
|
||||
|
@ -25,10 +35,11 @@ LOCAL_SRCS += \
|
|||
$(OPTRTDIR)/string/arm/strcmp.S \
|
||||
$(OPTRTDIR)/string/arm/strcpy.c \
|
||||
$(OPTRTDIR)/string/arm/strlen-armv6t2.S
|
||||
# Replace the general srcs of the same name with specially optimized srcs
|
||||
LOCAL_SRCS += $(LOCAL_OPT_SRCS)
|
||||
LOCAL_SRCS := $(filter-out $(LOCAL_FILTER_SRCS),$(LOCAL_SRCS))
|
||||
endif
|
||||
|
||||
LOCAL_SRCS += $(wildcard src/*.c src/*/*.c)
|
||||
|
||||
LOCAL_INCLUDE := \
|
||||
-I $(LITEOSTOPDIR)/syscall \
|
||||
-I $(LITEOSTOPDIR)/bsd/dev/random \
|
||||
|
|
|
@ -0,0 +1,207 @@
|
|||
/*
|
||||
* Copyright (c) 2021-2021 Huawei Device Co., Ltd. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification,
|
||||
* are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this list of
|
||||
* conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
|
||||
* of conditions and the following disclaimer in the documentation and/or other materials
|
||||
* provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors may be used
|
||||
* to endorse or promote products derived from this software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
.syntax unified
|
||||
.arch armv7-a
|
||||
.fpu neon
|
||||
.globl memcmp @ -- Begin function memcmp
|
||||
.p2align 2
|
||||
.type memcmp,%function
|
||||
.code 32 @memcmp
|
||||
memcmp:
|
||||
@ r0 = str1
|
||||
@ r1 = str2
|
||||
@ r2 = count
|
||||
.fnstart
|
||||
push {r4, r5, r6, r7, lr}
|
||||
pld [r0, #0]
|
||||
pld [r1, #0]
|
||||
/**
|
||||
* if (str1 == str2) || (n == 0) return;
|
||||
*/
|
||||
cmp r0, r1
|
||||
cmpne r2, #0
|
||||
beq Lreturn_0
|
||||
/**
|
||||
* Determine whether the first byte is different.
|
||||
*/
|
||||
ldrb r3, [r0] @ r3 = *str1
|
||||
ldrb r4, [r1] @ r4 = *str2
|
||||
pld [r0, #64]
|
||||
pld [r1, #64]
|
||||
cmp r3, r4
|
||||
subne r0, r3, r4
|
||||
bne Lreturn
|
||||
|
||||
/**
|
||||
* Comparing 32 bytes each time, using floating-point registers to improve efficiency.
|
||||
*/
|
||||
L32_byte_cmp:
|
||||
cmp r2, #32
|
||||
blo L16_byte_cmp
|
||||
sub r2, r2, #32
|
||||
vld1.8 {d0 - d3}, [r0]!
|
||||
vld1.8 {d4 - d7}, [r1]!
|
||||
vsub.i8 q0, q0, q2 @ q0: Difference of the first 16 bytes
|
||||
vsub.i8 q1, q1, q3 @ q1: Difference of the last 16 bytes
|
||||
pld [r0, #64]
|
||||
pld [r1, #64]
|
||||
|
||||
vorr d4, d0, d1 @ d4: Save the result of calculating whether the first 16 bytes are equal.
|
||||
vorr d5, d2, d3 @ d5: Save the result of calculating whether the last 16 bytes are equal.
|
||||
vorr d6, d4, d5 @ d6: Save the result of 32 bytes calculation whether they are equal.
|
||||
vmov r3, r4, d6
|
||||
orr r5, r3, r4
|
||||
cmp r5, #0
|
||||
beq L32_byte_cmp
|
||||
|
||||
/**
|
||||
* Going to the diff branch shows that a certain byte must be different at this time.
|
||||
* We use r3 to indicate whether the first half of the multibytes are equal,
|
||||
* and r4 to indicate whether the second half of the multibytes are equal.
|
||||
*/
|
||||
L32_byte_diff:
|
||||
vmov r3, r4, d4
|
||||
orr r3, r3, r4
|
||||
/**
|
||||
* Adjust the two pointers back.
|
||||
*/
|
||||
sub r0, #32
|
||||
sub r1, #32
|
||||
cmp r3, #0
|
||||
addeq r0, #16
|
||||
addeq r1, #16
|
||||
beq L16_byte_diff_back
|
||||
vmov r3, r4, d0
|
||||
vmov r5, r6, d1
|
||||
b L16_byte_diff
|
||||
|
||||
L16_byte_diff_back:
|
||||
vmov r3, r4, d2
|
||||
vmov r5, r6, d3
|
||||
|
||||
L16_byte_diff:
|
||||
orr r7, r3, r4
|
||||
cmp r7, #0
|
||||
addeq r0, #8
|
||||
addeq r1, #8
|
||||
beq L8_byte_diff_back
|
||||
b L8_byte_diff
|
||||
|
||||
L8_byte_diff_back:
|
||||
mov r3, r5
|
||||
mov r4, r6
|
||||
|
||||
L8_byte_diff:
|
||||
cmp r3, #0
|
||||
addeq r0, #4
|
||||
addeq r1, #4
|
||||
beq L4_byte_diff
|
||||
|
||||
L4_byte_diff:
|
||||
ldrb r5, [r0], #1
|
||||
ldrb r6, [r1], #1
|
||||
subs r5, r5, r6
|
||||
beq L4_byte_diff
|
||||
mov r0, r5
|
||||
b Lreturn
|
||||
|
||||
/**
|
||||
* The dichotomy handles the case of less than 32 bytes.
|
||||
*/
|
||||
L16_byte_cmp:
|
||||
cmp r2, #16
|
||||
blo L8_byte_cmp
|
||||
sub r2, r2, #16
|
||||
vld1.8 {d0 - d1}, [r0]!
|
||||
vld1.8 {d4 - d5}, [r1]!
|
||||
vsub.i8 q0, q0, q2
|
||||
pld [r0, #64]
|
||||
pld [r1, #64]
|
||||
|
||||
vorr d4, d0, d1
|
||||
vmov r3, r4, d4
|
||||
orr r3, r3, r4
|
||||
cmp r3, #0
|
||||
beq L8_byte_cmp
|
||||
sub r0, #16
|
||||
sub r1, #16
|
||||
vmov r3, r4, d0
|
||||
vmov r5, r6, d1
|
||||
b L16_byte_diff
|
||||
|
||||
L8_byte_cmp:
|
||||
cmp r2, #8
|
||||
blo L4_byte_cmp
|
||||
sub r2, r2, #8
|
||||
vld1.8 {d0}, [r0]!
|
||||
vld1.8 {d4}, [r1]!
|
||||
vsub.i8 d0, d0, d4
|
||||
|
||||
vmov r3, r4, d0
|
||||
orr r7, r3, r4
|
||||
cmp r7, #0
|
||||
beq L4_byte_cmp
|
||||
sub r0, #8
|
||||
sub r1, #8
|
||||
b L8_byte_diff
|
||||
|
||||
L4_byte_cmp:
|
||||
cmp r2, #4
|
||||
blo Lless_4_byte_cmp
|
||||
sub r2, r2, #4
|
||||
ldr r3, [r0], #4
|
||||
ldr r4, [r1], #4
|
||||
cmp r3, r4
|
||||
beq Lless_4_byte_cmp
|
||||
sub r0, #4
|
||||
sub r1, #4
|
||||
b L4_byte_diff
|
||||
|
||||
Lless_4_byte_cmp:
|
||||
cmp r2, #0
|
||||
beq Lreturn_0
|
||||
sub r2, r2, #1
|
||||
ldrb r3, [r0], #1
|
||||
ldrb r4, [r1], #1
|
||||
sub r5, r3, r4
|
||||
cmp r5, #0
|
||||
movne r0, r5
|
||||
bne Lreturn
|
||||
b Lless_4_byte_cmp
|
||||
|
||||
Lreturn_0:
|
||||
mov r0, #0
|
||||
Lreturn:
|
||||
pop {r4, r5, r6, r7, pc}
|
||||
Lfunc_end:
|
||||
.size memcmp, Lfunc_end - memcmp
|
||||
.cantunwind
|
||||
.fnend @ -- End function
|
|
@ -0,0 +1,128 @@
|
|||
/*
|
||||
* Copyright (c) 2021-2021 Huawei Device Co., Ltd. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification,
|
||||
* are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this list of
|
||||
* conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
|
||||
* of conditions and the following disclaimer in the documentation and/or other materials
|
||||
* provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors may be used
|
||||
* to endorse or promote products derived from this software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
.syntax unified
|
||||
.arch armv7-a
|
||||
.fpu neon
|
||||
.globl memset @ -- Begin function memset
|
||||
.p2align 2
|
||||
.type memset,%function
|
||||
memset:
|
||||
@ r0 = address
|
||||
@ r1 = char
|
||||
@ r2 = count
|
||||
@ returns original address in r0
|
||||
.fnstart
|
||||
|
||||
push {r4}
|
||||
cmp r2, #0
|
||||
beq Lreturn
|
||||
vdup.8 q0, r1
|
||||
mov r4, r0 @ r4 = r0 = address
|
||||
|
||||
L64_byte_alignment:
|
||||
ands r3, r0, #7
|
||||
beq L64_byte_aligned
|
||||
rsb r3, r3, #8 @ r3 = unalignedCnt = 8 - (address % 7)
|
||||
cmp r2, r3
|
||||
movlo r3, r2
|
||||
sub r2, r2, r3
|
||||
|
||||
Lloop1:
|
||||
strb r1, [r4], #1
|
||||
subs r3, r3, #1
|
||||
bgt Lloop1
|
||||
|
||||
/**
|
||||
* Set 64 bytes each time, and use floating-point registers to improve efficiency.
|
||||
*/
|
||||
L64_byte_aligned:
|
||||
vmov q1, q0
|
||||
vmov q2, q0
|
||||
cmp r2, #64
|
||||
blo L32_byte_aligned
|
||||
vmov q3, q0
|
||||
sub r2, r2, #64
|
||||
Lloop2:
|
||||
vstmia r4!, {d0 - d7}
|
||||
subs r2, r2, #64
|
||||
bgt Lloop2
|
||||
|
||||
/**
|
||||
* The dichotomy handles the case of less than 64 bytes,
|
||||
* and the front will subtract 64 more, and you need to make it up at this time.
|
||||
*/
|
||||
add r2, r2, #64
|
||||
L32_byte_aligned:
|
||||
cmp r2, #0
|
||||
beq Lreturn
|
||||
cmp r2, #32
|
||||
blo L16_byte_aligned
|
||||
sub r2, r2, #32
|
||||
vstmia r4!, {d0 - d3}
|
||||
|
||||
L16_byte_aligned:
|
||||
cmp r2, #0
|
||||
beq Lreturn
|
||||
cmp r2, #16
|
||||
blo L8_byte_aligned
|
||||
sub r2, r2, #16
|
||||
vstmia r4!, {d0 - d1}
|
||||
|
||||
L8_byte_aligned:
|
||||
cmp r2, #0
|
||||
beq Lreturn
|
||||
cmp r2, #8
|
||||
blo L4_byte_aligned
|
||||
sub r2, r2, #8
|
||||
vstmia r4!, {d0}
|
||||
|
||||
L4_byte_aligned:
|
||||
cmp r2, #0
|
||||
beq Lreturn
|
||||
cmp r2, #4
|
||||
blo Lless_4_byte
|
||||
sub r2, r2, #4
|
||||
vst1.32 {d0[0]}, [r4]!
|
||||
|
||||
Lless_4_byte:
|
||||
cmp r2, #0
|
||||
beq Lreturn
|
||||
strb r1, [r4], #1
|
||||
sub r2, r2, #1
|
||||
b Lless_4_byte
|
||||
|
||||
Lreturn:
|
||||
pop {r4}
|
||||
bx lr
|
||||
Lfunc_end:
|
||||
.size memset, Lfunc_end - memset
|
||||
.cantunwind
|
||||
.fnend @ -- End function
|
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
* Copyright (c) 2021-2021 Huawei Device Co., Ltd. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification,
|
||||
* are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this list of
|
||||
* conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
|
||||
* of conditions and the following disclaimer in the documentation and/or other materials
|
||||
* provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors may be used
|
||||
* to endorse or promote products derived from this software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
int memcmp(const void *str1, const void *str2, size_t n)
|
||||
{
|
||||
|
||||
const unsigned char *s1 = str1;
|
||||
const unsigned char *s2 = str2;
|
||||
|
||||
while (n >= 8) {
|
||||
if (*(const uint64_t *)(s1) != *(const uint64_t *)(s2)) {
|
||||
goto L8_byte_diff;
|
||||
}
|
||||
s1 += 8;
|
||||
s2 += 8;
|
||||
n -= 8;
|
||||
}
|
||||
if (n == 0) return 0;
|
||||
|
||||
|
||||
/* L4_byte_cmp */
|
||||
if (n >= 4) {
|
||||
if (*(const uint32_t *)(s1) != *(const uint32_t *)(s2)) {
|
||||
goto L4_byte_diff;
|
||||
}
|
||||
s1 += 4;
|
||||
s2 += 4;
|
||||
n -= 4;
|
||||
}
|
||||
if (n == 0) return 0;
|
||||
|
||||
L4_byte_diff:
|
||||
for (; n && (*s1 == *s2); n--, s1++, s2++);
|
||||
return n ? *s1 - *s2 : 0;
|
||||
|
||||
L8_byte_diff:
|
||||
if (*(const uint32_t *)(s1) != *(const uint32_t *)(s2)) {
|
||||
goto L4_byte_diff;
|
||||
}
|
||||
s1 += 4;
|
||||
s2 += 4;
|
||||
n -= 4;
|
||||
goto L4_byte_diff;
|
||||
}
|
|
@ -0,0 +1,107 @@
|
|||
/*
|
||||
* Copyright (c) 2021-2021 Huawei Device Co., Ltd. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification,
|
||||
* are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this list of
|
||||
* conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
|
||||
* of conditions and the following disclaimer in the documentation and/or other materials
|
||||
* provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors may be used
|
||||
* to endorse or promote products derived from this software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
void *memset(void *dest, int c, size_t n)
|
||||
{
|
||||
char *pos = dest;
|
||||
uint32_t c32 = 0;
|
||||
uint64_t c64 = 0;
|
||||
|
||||
if (n == 0) return dest;
|
||||
|
||||
c = c & 0xFF;
|
||||
if (c) {
|
||||
c32 = c;
|
||||
c32 |= c32 << 8;
|
||||
c32 |= c32 << 16;
|
||||
c64 = c32;
|
||||
c64 |= c64 << 32;
|
||||
}
|
||||
|
||||
if (((uintptr_t)(pos) & 7) != 0) {
|
||||
int unalignedCnt = 8 - ((uintptr_t)(pos) & 7);
|
||||
if (n >= unalignedCnt) {
|
||||
n = n - unalignedCnt;
|
||||
} else {
|
||||
unalignedCnt = n;
|
||||
n = 0;
|
||||
}
|
||||
for (int loop = 1; loop <= unalignedCnt; ++loop) {
|
||||
*pos = (char)c;
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
|
||||
/* L32_byte_aligned */
|
||||
while (n >= 32) {
|
||||
*(uint64_t *)(pos) = c64;
|
||||
*(uint64_t *)(pos + 8) = c64;
|
||||
*(uint64_t *)(pos + 16) = c64;
|
||||
*(uint64_t *)(pos + 24) = c64;
|
||||
n -= 32;
|
||||
pos += 32;
|
||||
}
|
||||
if (n == 0) return dest;
|
||||
|
||||
/* L16_byte_aligned */
|
||||
if (n >= 16) {
|
||||
*(uint64_t *)(pos) = c64;
|
||||
*(uint64_t *)(pos + 8) = c64;
|
||||
n -= 16;
|
||||
pos += 16;
|
||||
if (n == 0) return dest;
|
||||
}
|
||||
|
||||
/* L8_byte_aligned */
|
||||
if (n >= 8) {
|
||||
*(uint64_t *)(pos) = c64;
|
||||
n -= 8;
|
||||
pos += 8;
|
||||
if (n == 0) return dest;
|
||||
}
|
||||
|
||||
/* L4_byte_aligned */
|
||||
if (n >= 4) {
|
||||
*(uint32_t *)(pos) = c32;
|
||||
n -= 4;
|
||||
pos += 4;
|
||||
if (n == 0) return dest;
|
||||
}
|
||||
while (n--) {
|
||||
*pos++ = c;
|
||||
}
|
||||
|
||||
return dest;
|
||||
}
|
Loading…
Reference in New Issue