feat:C库典型高频函数优化

针对字符串类高频函数:
1. 采用单次多字节操作提升字符串类高频函数的执行效率
2. 针对armv7-a,采用neon指令进行优化

close: #I42DAK

Signed-off-by: arvinzzz <zhaotianyu9@huawei.com>
Change-Id: Ic90d92f778e0006881f793585264ad7e5f644104
This commit is contained in:
arvinzzz 2021-08-08 13:02:22 +08:00
parent d2fe0e788b
commit 689c2e90cf
6 changed files with 536 additions and 2 deletions

View File

@ -60,6 +60,13 @@ kernel_module(module_name) {
"$OPTRTDIR/string/arm/strcmp.S",
"$OPTRTDIR/string/arm/strcpy.c",
"$OPTRTDIR/string/arm/strlen-armv6t2.S",
"src/arch/arm/memset.S",
"src/arch/arm/memcmp.S",
]
} else {
sources += [
"src/memset.c",
"src/memcmp.c",
]
}

View File

@ -9,6 +9,9 @@ MUSLPORTINGDIR = $(MUSLDIR)/porting/liteos_a/kernel
OPTRTDIR = $(TOPDIR)/third_party/optimized-routines
MUSL_SRCS =
LOCAL_OPT_DIR = src/arch/$(ARCH)
LOCAL_OPT_SRCS =
LOCAL_FILTER_SRCS =
MUSLPORTING_SRCS = \
src/*/$(ARCH)/*.[csS] \
@ -16,6 +19,13 @@ MUSLPORTING_SRCS = \
LOCAL_SRCS = $(wildcard $(addprefix $(MUSLPORTINGDIR)/,$(MUSLPORTING_SRCS)))
LOCAL_SRCS := $(filter-out $(subst $(MUSLPORTINGDIR),$(MUSLDIR),$(LOCAL_SRCS)),$(addprefix $(MUSLDIR)/,$(MUSL_SRCS)) $(LOCAL_SRCS))
LOCAL_SRCS += $(wildcard src/*.c src/*.S)
# Sources optimized for specific architectures
LOCAL_OPT_SRCS := $(wildcard $(LOCAL_OPT_DIR)/*.c, $(LOCAL_OPT_DIR)/*.S)
# Enumerate common src files with the same name as the optimized srcs
LOCAL_FILTER_SRCS := $(addprefix $(MUSLPORTINGDIR)/src/string/,$(subst $(LOCAL_OPT_DIR)/,,$(LOCAL_OPT_SRCS)))
LOCAL_FILTER_SRCS += $(addprefix src/,$(subst $(LOCAL_OPT_DIR)/,,$(LOCAL_OPT_SRCS)))
LOCAL_FILTER_SRCS := $(subst .S,.c, $(LOCAL_FILTER_SRCS))
ifeq ($(LOSCFG_ARCH_ARM_VER), "armv7-a")
LOCAL_SRCS := $(filter-out $(addprefix $(MUSLPORTINGDIR)/src/string/,memchr.c memcpy.c strcmp.c strcpy.c strlen.c), $(LOCAL_SRCS))
@ -25,10 +35,11 @@ LOCAL_SRCS += \
$(OPTRTDIR)/string/arm/strcmp.S \
$(OPTRTDIR)/string/arm/strcpy.c \
$(OPTRTDIR)/string/arm/strlen-armv6t2.S
# Replace the general srcs of the same name with specially optimized srcs
LOCAL_SRCS += $(LOCAL_OPT_SRCS)
LOCAL_SRCS := $(filter-out $(LOCAL_FILTER_SRCS),$(LOCAL_SRCS))
endif
LOCAL_SRCS += $(wildcard src/*.c src/*/*.c)
LOCAL_INCLUDE := \
-I $(LITEOSTOPDIR)/syscall \
-I $(LITEOSTOPDIR)/bsd/dev/random \

View File

@ -0,0 +1,207 @@
/*
* Copyright (c) 2021-2021 Huawei Device Co., Ltd. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
.syntax unified
.arch armv7-a
.fpu neon
.globl memcmp @ -- Begin function memcmp
.p2align 2
.type memcmp,%function
.code 32 @memcmp
memcmp:
@ r0 = str1
@ r1 = str2
@ r2 = count
.fnstart
push {r4, r5, r6, r7, lr}
pld [r0, #0]
pld [r1, #0]
/**
* if (str1 == str2) || (n == 0) return;
*/
cmp r0, r1
cmpne r2, #0
beq Lreturn_0
/**
* Determine whether the first byte is different.
*/
ldrb r3, [r0] @ r3 = *str1
ldrb r4, [r1] @ r4 = *str2
pld [r0, #64]
pld [r1, #64]
cmp r3, r4
subne r0, r3, r4
bne Lreturn
/**
* Comparing 32 bytes each time, using floating-point registers to improve efficiency.
*/
L32_byte_cmp:
cmp r2, #32
blo L16_byte_cmp
sub r2, r2, #32
vld1.8 {d0 - d3}, [r0]!
vld1.8 {d4 - d7}, [r1]!
vsub.i8 q0, q0, q2 @ q0: Difference of the first 16 bytes
vsub.i8 q1, q1, q3 @ q1: Difference of the last 16 bytes
pld [r0, #64]
pld [r1, #64]
vorr d4, d0, d1 @ d4: Save the result of calculating whether the first 16 bytes are equal.
vorr d5, d2, d3 @ d5: Save the result of calculating whether the last 16 bytes are equal.
vorr d6, d4, d5 @ d6: Save the result of 32 bytes calculation whether they are equal.
vmov r3, r4, d6
orr r5, r3, r4
cmp r5, #0
beq L32_byte_cmp
/**
* Going to the diff branch shows that a certain byte must be different at this time.
* We use r3 to indicate whether the first half of the multibytes are equal,
* and r4 to indicate whether the second half of the multibytes are equal.
*/
L32_byte_diff:
vmov r3, r4, d4
orr r3, r3, r4
/**
* Adjust the two pointers back.
*/
sub r0, #32
sub r1, #32
cmp r3, #0
addeq r0, #16
addeq r1, #16
beq L16_byte_diff_back
vmov r3, r4, d0
vmov r5, r6, d1
b L16_byte_diff
L16_byte_diff_back:
vmov r3, r4, d2
vmov r5, r6, d3
L16_byte_diff:
orr r7, r3, r4
cmp r7, #0
addeq r0, #8
addeq r1, #8
beq L8_byte_diff_back
b L8_byte_diff
L8_byte_diff_back:
mov r3, r5
mov r4, r6
L8_byte_diff:
cmp r3, #0
addeq r0, #4
addeq r1, #4
beq L4_byte_diff
L4_byte_diff:
ldrb r5, [r0], #1
ldrb r6, [r1], #1
subs r5, r5, r6
beq L4_byte_diff
mov r0, r5
b Lreturn
/**
* The dichotomy handles the case of less than 32 bytes.
*/
L16_byte_cmp:
cmp r2, #16
blo L8_byte_cmp
sub r2, r2, #16
vld1.8 {d0 - d1}, [r0]!
vld1.8 {d4 - d5}, [r1]!
vsub.i8 q0, q0, q2
pld [r0, #64]
pld [r1, #64]
vorr d4, d0, d1
vmov r3, r4, d4
orr r3, r3, r4
cmp r3, #0
beq L8_byte_cmp
sub r0, #16
sub r1, #16
vmov r3, r4, d0
vmov r5, r6, d1
b L16_byte_diff
L8_byte_cmp:
cmp r2, #8
blo L4_byte_cmp
sub r2, r2, #8
vld1.8 {d0}, [r0]!
vld1.8 {d4}, [r1]!
vsub.i8 d0, d0, d4
vmov r3, r4, d0
orr r7, r3, r4
cmp r7, #0
beq L4_byte_cmp
sub r0, #8
sub r1, #8
b L8_byte_diff
L4_byte_cmp:
cmp r2, #4
blo Lless_4_byte_cmp
sub r2, r2, #4
ldr r3, [r0], #4
ldr r4, [r1], #4
cmp r3, r4
beq Lless_4_byte_cmp
sub r0, #4
sub r1, #4
b L4_byte_diff
Lless_4_byte_cmp:
cmp r2, #0
beq Lreturn_0
sub r2, r2, #1
ldrb r3, [r0], #1
ldrb r4, [r1], #1
sub r5, r3, r4
cmp r5, #0
movne r0, r5
bne Lreturn
b Lless_4_byte_cmp
Lreturn_0:
mov r0, #0
Lreturn:
pop {r4, r5, r6, r7, pc}
Lfunc_end:
.size memcmp, Lfunc_end - memcmp
.cantunwind
.fnend @ -- End function

View File

@ -0,0 +1,128 @@
/*
* Copyright (c) 2021-2021 Huawei Device Co., Ltd. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
.syntax unified
.arch armv7-a
.fpu neon
.globl memset @ -- Begin function memset
.p2align 2
.type memset,%function
memset:
@ r0 = address
@ r1 = char
@ r2 = count
@ returns original address in r0
.fnstart
push {r4}
cmp r2, #0
beq Lreturn
vdup.8 q0, r1
mov r4, r0 @ r4 = r0 = address
L64_byte_alignment:
ands r3, r0, #7
beq L64_byte_aligned
rsb r3, r3, #8 @ r3 = unalignedCnt = 8 - (address % 7)
cmp r2, r3
movlo r3, r2
sub r2, r2, r3
Lloop1:
strb r1, [r4], #1
subs r3, r3, #1
bgt Lloop1
/**
* Set 64 bytes each time, and use floating-point registers to improve efficiency.
*/
L64_byte_aligned:
vmov q1, q0
vmov q2, q0
cmp r2, #64
blo L32_byte_aligned
vmov q3, q0
sub r2, r2, #64
Lloop2:
vstmia r4!, {d0 - d7}
subs r2, r2, #64
bgt Lloop2
/**
* The dichotomy handles the case of less than 64 bytes,
* and the front will subtract 64 more, and you need to make it up at this time.
*/
add r2, r2, #64
L32_byte_aligned:
cmp r2, #0
beq Lreturn
cmp r2, #32
blo L16_byte_aligned
sub r2, r2, #32
vstmia r4!, {d0 - d3}
L16_byte_aligned:
cmp r2, #0
beq Lreturn
cmp r2, #16
blo L8_byte_aligned
sub r2, r2, #16
vstmia r4!, {d0 - d1}
L8_byte_aligned:
cmp r2, #0
beq Lreturn
cmp r2, #8
blo L4_byte_aligned
sub r2, r2, #8
vstmia r4!, {d0}
L4_byte_aligned:
cmp r2, #0
beq Lreturn
cmp r2, #4
blo Lless_4_byte
sub r2, r2, #4
vst1.32 {d0[0]}, [r4]!
Lless_4_byte:
cmp r2, #0
beq Lreturn
strb r1, [r4], #1
sub r2, r2, #1
b Lless_4_byte
Lreturn:
pop {r4}
bx lr
Lfunc_end:
.size memset, Lfunc_end - memset
.cantunwind
.fnend @ -- End function

74
lib/libc/src/memcmp.c Normal file
View File

@ -0,0 +1,74 @@
/*
* Copyright (c) 2021-2021 Huawei Device Co., Ltd. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <string.h>
#include <stdint.h>
int memcmp(const void *str1, const void *str2, size_t n)
{
const unsigned char *s1 = str1;
const unsigned char *s2 = str2;
while (n >= 8) {
if (*(const uint64_t *)(s1) != *(const uint64_t *)(s2)) {
goto L8_byte_diff;
}
s1 += 8;
s2 += 8;
n -= 8;
}
if (n == 0) return 0;
/* L4_byte_cmp */
if (n >= 4) {
if (*(const uint32_t *)(s1) != *(const uint32_t *)(s2)) {
goto L4_byte_diff;
}
s1 += 4;
s2 += 4;
n -= 4;
}
if (n == 0) return 0;
L4_byte_diff:
for (; n && (*s1 == *s2); n--, s1++, s2++);
return n ? *s1 - *s2 : 0;
L8_byte_diff:
if (*(const uint32_t *)(s1) != *(const uint32_t *)(s2)) {
goto L4_byte_diff;
}
s1 += 4;
s2 += 4;
n -= 4;
goto L4_byte_diff;
}

107
lib/libc/src/memset.c Normal file
View File

@ -0,0 +1,107 @@
/*
* Copyright (c) 2021-2021 Huawei Device Co., Ltd. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
void *memset(void *dest, int c, size_t n)
{
char *pos = dest;
uint32_t c32 = 0;
uint64_t c64 = 0;
if (n == 0) return dest;
c = c & 0xFF;
if (c) {
c32 = c;
c32 |= c32 << 8;
c32 |= c32 << 16;
c64 = c32;
c64 |= c64 << 32;
}
if (((uintptr_t)(pos) & 7) != 0) {
int unalignedCnt = 8 - ((uintptr_t)(pos) & 7);
if (n >= unalignedCnt) {
n = n - unalignedCnt;
} else {
unalignedCnt = n;
n = 0;
}
for (int loop = 1; loop <= unalignedCnt; ++loop) {
*pos = (char)c;
pos++;
}
}
/* L32_byte_aligned */
while (n >= 32) {
*(uint64_t *)(pos) = c64;
*(uint64_t *)(pos + 8) = c64;
*(uint64_t *)(pos + 16) = c64;
*(uint64_t *)(pos + 24) = c64;
n -= 32;
pos += 32;
}
if (n == 0) return dest;
/* L16_byte_aligned */
if (n >= 16) {
*(uint64_t *)(pos) = c64;
*(uint64_t *)(pos + 8) = c64;
n -= 16;
pos += 16;
if (n == 0) return dest;
}
/* L8_byte_aligned */
if (n >= 8) {
*(uint64_t *)(pos) = c64;
n -= 8;
pos += 8;
if (n == 0) return dest;
}
/* L4_byte_aligned */
if (n >= 4) {
*(uint32_t *)(pos) = c32;
n -= 4;
pos += 4;
if (n == 0) return dest;
}
while (n--) {
*pos++ = c;
}
return dest;
}