diff --git a/base/glibc/glibc.conf b/base/glibc/glibc.conf index 19b8379b3..122d1c00c 100644 --- a/base/glibc/glibc.conf +++ b/base/glibc/glibc.conf @@ -2,7 +2,7 @@ # This copyright note is auto-generated by ./scripts/Create-CopyPatch. # # Filename: package/.../glibc/glibc.conf -# Copyright (C) 2006 - 2008 The OpenSDE Project +# Copyright (C) 2006 - 2009 The OpenSDE Project # Copyright (C) 2004 - 2006 The T2 SDE Project # # More information can be found in the files COPYING and README. @@ -33,6 +33,8 @@ var_insert GCC_WRAPPER_INSERT " " "-O" # glibc does not like -ffast-math var_insert GCC_WRAPPER_REMOVE " " "-ffast-math" +var_append confopt " " "--enable-kernel=2.6.18" + # Disable glibc internal debuging but build debuging and profile # code so we can use this glibc to debug other programs. pkg_glibc_preconf(){ @@ -95,7 +97,7 @@ glibc_prepatch() { local addon= addons="ports" [ "$SDECFG_PKG_GLIBC_LIBIDN" != '1' ] || var_append addons ' ' 'libidn' - [ "$SDECFG_PKG_GLIBC_TLS" = 1 ] || var_append addons ' ' 'linuxthreads' + ##[ "$SDECFG_PKG_GLIBC_TLS" = 1 ] || var_append addons ' ' 'linuxthreads' for addon in $addons; do tarball=$( match_source_file -p $addon ) diff --git a/base/glibc/glibc.desc b/base/glibc/glibc.desc index 460a600a9..13faf4898 100644 --- a/base/glibc/glibc.desc +++ b/base/glibc/glibc.desc @@ -1,9 +1,8 @@ - [COPY] --- SDE-COPYRIGHT-NOTE-BEGIN --- [COPY] This copyright note is auto-generated by ./scripts/Create-CopyPatch. [COPY] [COPY] Filename: package/.../glibc/glibc.desc -[COPY] Copyright (C) 2006 - 2007 The OpenSDE Project +[COPY] Copyright (C) 2006 - 2009 The OpenSDE Project [COPY] Copyright (C) 2004 - 2006 The T2 SDE Project [COPY] Copyright (C) 1998 - 2003 Clifford Wolf [COPY] @@ -34,14 +33,9 @@ [L] GPL [S] Stable -[V] 2.5.1 +[V] 2.9 [P] X 01---5---9 101.600 -[D] 594994693 glibc-2.5.1.tar.bz2 http://ftp.gnu.org/gnu/glibc/ -[D] 3516243427 glibc-libidn-2.5.1.tar.gz http://ftp.gnu.org/gnu/glibc/ -[D] 4254376675 glibc-linuxthreads-2.5.tar.bz2 http://ftp.gnu.org/gnu/glibc/ -[D] 4254948014 glibc-ports-2.5.tar.bz2 http://ftp.gnu.org/gnu/glibc/ - -#[D] 1589019524 glibc-20060605.tar.bz2 ftp://sources.redhat.com/pub/glibc/snapshots/ -#[D] X glibc-2.3.4-2004-12-24.tar.bz2 cvs://:pserver:anoncvs@sources.redhat.com:/cvs/glibc libc -r glibc-2_3-branch -D 2004-12-24 - +[D] 3110082802 glibc-2.9.tar.bz2 http://ftp.gnu.org/gnu/glibc/ +[D] 1001729082 glibc-libidn-2.9.tar.gz http://ftp.gnu.org/gnu/glibc/ +[D] 1975249228 glibc-ports-2.9.tar.bz2 http://ftp.gnu.org/gnu/glibc/ diff --git a/base/glibc/nptl-force-checks.patch.cross b/base/glibc/nptl-force-checks.patch.cross deleted file mode 100644 index 48bc64097..000000000 --- a/base/glibc/nptl-force-checks.patch.cross +++ /dev/null @@ -1,42 +0,0 @@ -# --- SDE-COPYRIGHT-NOTE-BEGIN --- -# This copyright note is auto-generated by ./scripts/Create-CopyPatch. -# -# Filename: package/.../glibc/nptl-force-checks.patch.cross -# Copyright (C) 2004 - 2006 The T2 SDE Project -# -# More information can be found in the files COPYING and README. -# -# This patch file is dual-licensed. It is available under the license the -# patched project is licensed under, as long as it is an OpenSource license -# as defined at http://www.opensource.org/ (e.g. BSD, X11) or under the terms -# of the GNU General Public License as published by the Free Software -# Foundation; either version 2 of the License, or (at your option) any later -# version. -# --- SDE-COPYRIGHT-NOTE-END --- - -Yet another ugly glibc patch for the ugly glibc configure stuff. We -force the nptl checks to succeed during the initial cross tool build, -because without libc the compiler can not link programs that early ... - - - Rene Rebe - ---- libc/nptl/sysdeps/pthread/configure.orig 2003-12-03 07:50:01.000000000 +0100 -+++ libc/nptl/sysdeps/pthread/configure 2004-10-31 00:02:50.526362392 +0200 -@@ -74,7 +74,7 @@ - echo "$as_me: failed program was:" >&5 - sed 's/^/| /' conftest.$ac_ext >&5 - --libc_cv_forced_unwind=no -+libc_cv_forced_unwind=yes - fi - rm -f conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -@@ -139,7 +139,7 @@ - echo "$as_me: failed program was:" >&5 - sed 's/^/| /' conftest.$ac_ext >&5 - --libc_cv_c_cleanup=no -+libc_cv_c_cleanup=yes - fi - rm -f conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext diff --git a/base/glibc/x86_64-string.patch b/base/glibc/x86_64-string.patch deleted file mode 100644 index 7c30aa591..000000000 --- a/base/glibc/x86_64-string.patch +++ /dev/null @@ -1,2937 +0,0 @@ -# --- SDE-COPYRIGHT-NOTE-BEGIN --- -# This copyright note is auto-generated by ./scripts/Create-CopyPatch. -# -# Filename: package/.../glibc/x86_64-string.patch -# Copyright (C) 2006 The T2 SDE Project -# -# More information can be found in the files COPYING and README. -# -# This patch file is dual-licensed. It is available under the license the -# patched project is licensed under, as long as it is an OpenSource license -# as defined at http://www.opensource.org/ (e.g. BSD, X11) or under the terms -# of the GNU General Public License as published by the Free Software -# Foundation; either version 2 of the License, or (at your option) any later -# version. -# --- SDE-COPYRIGHT-NOTE-END --- - -diff -Npruw -x CVS -x vssver.scc -x powerpc -x sync_file_range.c libc/sysdeps/unix/sysv/linux/x86_64/dl-procinfo.c libc/sysdeps/unix/sysv/linux/x86_64/dl-procinfo.c ---- libc/sysdeps/unix/sysv/linux/x86_64/dl-procinfo.c 2005-12-14 02:09:28.000000000 -0600 -+++ libc/sysdeps/unix/sysv/linux/x86_64/dl-procinfo.c 2006-04-14 16:38:16.819949000 -0500 -@@ -1,5 +1,5 @@ - #ifdef IS_IN_ldconfig - # include - #else --# include -+# include - #endif -diff -Npruw -x CVS -x vssver.scc -x powerpc -x sync_file_range.c libc/sysdeps/x86_64/bzero.S libc/sysdeps/x86_64/bzero.S ---- libc/sysdeps/x86_64/bzero.S 2002-08-31 12:30:07.000000000 -0500 -+++ libc/sysdeps/x86_64/bzero.S 2006-05-05 15:23:27.884691000 -0500 -@@ -1,3 +1,5 @@ -+#define USE_AS_BZERO - #define memset __bzero - #include -+ - weak_alias (__bzero, bzero) -diff -Npruw -x CVS -x vssver.scc -x powerpc -x sync_file_range.c libc/sysdeps/x86_64/dl-machine.h libc/sysdeps/x86_64/dl-machine.h ---- libc/sysdeps/x86_64/dl-machine.h 2005-07-31 12:49:44.000000000 -0500 -+++ libc/sysdeps/x86_64/dl-machine.h 2006-05-09 15:17:03.570496000 -0500 -@@ -1,4 +1,5 @@ --/* Machine-dependent ELF dynamic relocation inline functions. x86-64 version. -+/* Machine-dependent ELF dynamic relocation inline functions (x86-64 version). -+ - Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Andreas Jaeger . -@@ -219,6 +220,53 @@ dl_platform_init (void) - if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0') - /* Avoid an empty string which would disturb us. */ - GLRO(dl_platform) = NULL; -+ -+ asm volatile -+ ( -+ "mov $0x80000000, %%eax # get highest level of support \n\t" -+ "cpuid \n\t" -+ "cmp $0x80000006, %%eax # check for L2 info support \n\t" -+ "jb 1f \n\t" -+ "xor %%eax, %%eax # get manufacturer string \n\t" -+ "cpuid \n\t" -+ "cmp $0x68747541, %%ebx # check for 'Auth'... \n\t" -+ "jne 4f \n\t" -+ "cmp $0x69746e65, %%edx # 'enti'... \n\t" -+ "jne 4f \n\t" -+ "cmp $0x444d4163, %%ecx # 'cAMD' \n\t" -+ "je 2f \n\t" -+ "4: \n\t" -+ "cmp $0x756e6547, %%ebx # check for 'Genu'... \n\t" -+ "jne 1f \n\t" -+ "cmp $0x49656e69, %%edx # 'ineI'... \n\t" -+ "jne 1f \n\t" -+ "cmp $0x6c65746e, %%ecx # 'ntel' \n\t" -+ "je 3f \n\t" -+ "jmp 1f \n\t" -+ "2: # AMD \n\t" -+ "mov $0x80000001, %%eax # get features support\n\t" -+ "cpuid \n\t" -+ "test $1 << 31, %%edx # check for 3DNow! support\n\t" -+ "setnzb %2 \n\t" -+ "mov $0x80000005, %%eax # get L1 info for AMD\n\t" -+ "cpuid \n\t" -+ "shr $24, %%ecx \n\t" -+ "shl $10, %%ecx # convert from KB to B \n\t" -+ "mov %%rcx, %0 \n\t" -+ "3: # AMD, Intel \n\t" -+ "mov $0x80000006, %%eax # get L2 info\n\t" -+ "cpuid \n\t" -+ "shr $16, %%ecx \n\t" -+ "shl $10, %%ecx # convert from KB to B \n\t" -+ "mov %%rcx, %1 \n\t" -+ "1: # other manufacturers\n\t" -+ : "=m" (GLRO (dl_cache1size)), "=m" (GLRO (dl_cache2size)), "=m" (GLRO (dl_prefetchw)) -+ : -+ : "%rax", "%rbx", "%rcx", "%rdx", "cc" -+ ); -+ -+ GLRO (dl_cache1sizehalf) = GLRO (dl_cache1size) / 2; -+ GLRO (dl_cache2sizehalf) = GLRO (dl_cache2size) / 2; - } - - static inline Elf64_Addr -diff -Npruw -x CVS -x vssver.scc -x powerpc -x sync_file_range.c libc/sysdeps/x86_64/dl-procinfo.c libc/sysdeps/x86_64/dl-procinfo.c ---- libc/sysdeps/x86_64/dl-procinfo.c 1969-12-31 18:00:00.000000000 -0600 -+++ libc/sysdeps/x86_64/dl-procinfo.c 2006-05-05 16:23:38.030341000 -0500 -@@ -0,0 +1,123 @@ -+/* Data for x86-64 version of processor capability information. -+ Copyright (C) 2004 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ Contributed by Andreas Jaeger , 2004. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, write to the Free -+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ 02111-1307 USA. */ -+ -+/* This information must be kept in sync with the _DL_HWCAP_COUNT and -+ _DL_PLATFORM_COUNT definitions in procinfo.h. -+ -+ If anything should be added here check whether the size of each string -+ is still ok with the given array size. -+ -+ All the #ifdefs in the definitions ar equite irritating but -+ necessary if we want to avoid duplicating the information. There -+ are three different modes: -+ -+ - PROCINFO_DECL is defined. This means we are only interested in -+ declarations. -+ -+ - PROCINFO_DECL is not defined: -+ -+ + if SHARED is defined the file is included in an array -+ initializer. The .element = { ... } syntax is needed. -+ -+ + if SHARED is not defined a normal array initialization is -+ needed. -+ */ -+ -+#ifndef PROCINFO_CLASS -+#define PROCINFO_CLASS -+#endif -+ -+ /* _dl_cache1size: size of L1 cache */ -+#if !defined PROCINFO_DECL && defined SHARED -+ ._dl_cache1size -+#else -+PROCINFO_CLASS long int _dl_cache1size -+#endif -+#ifndef PROCINFO_DECL -+= 1024 * 32 /* defaults to 32 */ -+#endif -+#if !defined SHARED || defined PROCINFO_DECL -+; -+#else -+, -+#endif -+ -+ /* _dl_cache1sizehalf: 1/2 size of L1 cache */ -+#if !defined PROCINFO_DECL && defined SHARED -+ ._dl_cache1sizehalf -+#else -+PROCINFO_CLASS long int _dl_cache1sizehalf -+#endif -+#ifndef PROCINFO_DECL -+= 1024 * 32 / 2 /* defaults to 16k */ -+#endif -+#if !defined SHARED || defined PROCINFO_DECL -+; -+#else -+, -+#endif -+ -+ /* _dl_cache2size: size of L2 cache */ -+#if !defined PROCINFO_DECL && defined SHARED -+ ._dl_cache2size -+#else -+PROCINFO_CLASS long int _dl_cache2size -+#endif -+#ifndef PROCINFO_DECL -+= 1024 * 1024 /* defaults to 1M */ -+#endif -+#if !defined SHARED || defined PROCINFO_DECL -+; -+#else -+, -+#endif -+ -+ /* _dl_cache2sizehalf: 1/2 size of L2 cache */ -+#if !defined PROCINFO_DECL && defined SHARED -+ ._dl_cache2sizehalf -+#else -+PROCINFO_CLASS long int _dl_cache2sizehalf -+#endif -+#ifndef PROCINFO_DECL -+= 1024 * 1024 / 2 /* defaults to 512k */ -+#endif -+#if !defined SHARED || defined PROCINFO_DECL -+; -+#else -+, -+#endif -+ -+ /* _dl_prefetchw: prefetchw supported */ -+#if !defined PROCINFO_DECL && defined SHARED -+ ._dl_prefetchw -+#else -+PROCINFO_CLASS int _dl_prefetchw -+#endif -+#ifndef PROCINFO_DECL -+= 0 /* defaults to no */ -+#endif -+#if !defined SHARED || defined PROCINFO_DECL -+; -+#else -+, -+#endif -+ -+#undef PROCINFO_DECL -+#undef PROCINFO_CLASS -diff -Npruw -x CVS -x vssver.scc -x powerpc -x sync_file_range.c libc/sysdeps/x86_64/elf/rtld-global-offsets.sym libc/sysdeps/x86_64/elf/rtld-global-offsets.sym ---- libc/sysdeps/x86_64/elf/rtld-global-offsets.sym 1969-12-31 18:00:00.000000000 -0600 -+++ libc/sysdeps/x86_64/elf/rtld-global-offsets.sym 2006-04-18 14:46:40.056693000 -0500 -@@ -0,0 +1,11 @@ -+#define SHARED 1 -+ -+#include -+ -+#define rtdl_global_offsetof(mem) offsetof (struct rtld_global_ro, mem) -+ -+RTLD_GLOBAL_DL_CACHE1SIZE rtdl_global_offsetof (_dl_cache1size) -+RTLD_GLOBAL_DL_CACHE1SIZEHALF rtdl_global_offsetof (_dl_cache1sizehalf) -+RTLD_GLOBAL_DL_CACHE2SIZE rtdl_global_offsetof (_dl_cache2size) -+RTLD_GLOBAL_DL_CACHE2SIZEHALF rtdl_global_offsetof (_dl_cache2sizehalf) -+RTLD_GLOBAL_DL_PREFETCHW rtdl_global_offsetof (_dl_prefetchw) -diff -Npruw -x CVS -x vssver.scc -x powerpc -x sync_file_range.c libc/sysdeps/x86_64/Makefile libc/sysdeps/x86_64/Makefile ---- libc/sysdeps/x86_64/Makefile 2004-08-16 01:46:14.000000000 -0500 -+++ libc/sysdeps/x86_64/Makefile 2006-04-14 16:38:16.802950000 -0500 -@@ -4,6 +4,9 @@ long-double-fcts = yes - ifeq ($(subdir),csu) - sysdep_routines += hp-timing - elide-routines.os += hp-timing -+ -+# get offset to rtld_global._dl_* -+gen-as-const-headers += rtld-global-offsets.sym - endif - - ifeq ($(subdir),gmon) -diff -Npruw -x CVS -x vssver.scc -x powerpc -x sync_file_range.c libc/sysdeps/x86_64/memcmp.S libc/sysdeps/x86_64/memcmp.S ---- libc/sysdeps/x86_64/memcmp.S 1969-12-31 18:00:00.000000000 -0600 -+++ libc/sysdeps/x86_64/memcmp.S 2006-05-18 14:43:07.611277000 -0500 -@@ -0,0 +1,328 @@ -+# (c) 2002 Advanced Micro Devices, Inc. -+# YOUR USE OF THIS CODE IS SUBJECT TO THE TERMS -+# AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC -+# LICENSE FOUND IN THE "README" FILE THAT IS -+# INCLUDED WITH THIS FILE -+ -+#include "sysdep.h" -+#if defined PIC && defined SHARED -+# include -+#endif -+ -+#if defined PIC && defined SHARED -+ .globl _rtld_local_ro -+ .hidden _rtld_local_ro -+ .set _rtld_local_ro,_rtld_global_ro -+#endif -+ -+ .text -+ -+ENTRY (memcmp) # (const void *, const void*, size_t) -+ -+L(try1): # up to 8B -+ cmp $8, %rdx -+ jae L(1after) -+ -+L(1): # 1-byte -+ test %rdx, %rdx -+ mov $0, %eax -+ jz L(exit) -+ -+L(1loop): -+ movzbl (%rdi), %eax -+ movzbl (%rsi), %ecx -+ sub %ecx, %eax -+ jnz L(exit) -+ -+ dec %rdx -+ -+ lea 1 (%rdi), %rdi -+ lea 1 (%rsi), %rsi -+ -+ jnz L(1loop) -+ -+L(exit): -+ rep -+ ret -+ -+ .p2align 4 -+ -+L(1after): -+ -+L(8try): # up to 32B -+ cmp $32, %rdx -+ jae L(8after) -+ -+L(8): # 8-byte -+ mov %edx, %ecx -+ shr $3, %ecx -+ jz L(1) -+ -+ .p2align 4 -+ -+L(8loop): -+ mov (%rsi), %rax -+ cmp (%rdi), %rax -+ jne L(1) -+ -+ sub $8, %rdx -+ dec %ecx -+ -+ lea 8 (%rsi), %rsi -+ lea 8 (%rdi), %rdi -+ -+ jnz L(8loop) -+ -+L(8skip): -+ and $7, %edx -+ jnz L(1) -+ -+ xor %eax, %eax -+ ret -+ -+ .p2align 4 -+ -+L(8after): -+ -+L(32try): # up to 2KB -+ cmp $2048, %rdx -+ ja L(32after) -+ -+L(32): # 32-byte -+ mov %edx, %ecx -+ shr $5, %ecx -+ jz L(8) -+ -+ .p2align 4 -+ -+L(32loop): -+ mov (%rsi), %rax -+ mov 8 (%rsi), %r8 -+ mov 16 (%rsi), %r9 -+ mov 24 (%rsi), %r10 -+ sub (%rdi), %rax -+ sub 8 (%rdi), %r8 -+ sub 16 (%rdi), %r9 -+ sub 24 (%rdi), %r10 -+ -+ or %rax, %r8 -+ or %r9, %r10 -+ or %r8, %r10 -+ jnz L(8) -+ -+ sub $32, %rdx -+ dec %ecx -+ -+ lea 32 (%rsi), %rsi -+ lea 32 (%rdi), %rdi -+ -+ jnz L(32loop) -+ -+L(32skip): -+ and $31, %edx -+ jnz L(8) -+ -+ xor %eax, %eax -+ ret -+ -+ .p2align 4 -+ -+L(32after): -+ -+L(srctry): -+ mov %esi, %r8d # align by source -+ -+ and $7, %r8d -+ jz L(srcafter) # not unaligned -+ -+L(src): # align -+ lea -8 (%r8, %rdx), %rdx -+ sub $8, %r8d -+ -+# .p2align 4 -+ -+L(srcloop): -+ movzbl (%rdi), %eax -+ movzbl (%rsi), %ecx -+ sub %ecx, %eax -+ jnz L(exit) -+ -+ inc %r8d -+ -+ lea 1 (%rdi), %rdi -+ lea 1 (%rsi), %rsi -+ -+ jnz L(srcloop) -+ -+ .p2align 4 -+ -+L(srcafter): -+ -+L(64try): # up to 1/2 L1 -+#ifdef PIC -+# ifdef SHARED -+ mov _rtld_local_ro@GOTPCREL (%rip), %rcx -+ mov RTLD_GLOBAL_DL_CACHE1SIZEHALF (%rcx), %rcx -+# else -+ mov _dl_cache1sizehalf@GOTPCREL (%rip), %rcx -+ mov (%rcx), %rcx -+# endif -+#else -+ mov _dl_cache1sizehalf, %rcx -+#endif -+ cmp %rdx, %rcx -+ cmova %rdx, %rcx -+ -+L(64): # 64-byte -+ shr $6, %rcx -+ jz L(32) -+ -+ .p2align 4 -+ -+L(64loop): -+ mov (%rsi), %rax -+ mov 8 (%rsi), %r8 -+ sub (%rdi), %rax -+ sub 8 (%rdi), %r8 -+ or %r8, %rax -+ -+ mov 16 (%rsi), %r9 -+ mov 24 (%rsi), %r10 -+ sub 16 (%rdi), %r9 -+ sub 24 (%rdi), %r10 -+ or %r10, %r9 -+ -+ or %r9, %rax -+ jnz L(32) -+ -+ mov 32 (%rsi), %rax -+ mov 40 (%rsi), %r8 -+ sub 32 (%rdi), %rax -+ sub 40 (%rdi), %r8 -+ or %r8, %rax -+ -+ mov 48 (%rsi), %r9 -+ mov 56 (%rsi), %r10 -+ sub 48 (%rdi), %r9 -+ sub 56 (%rdi), %r10 -+ or %r10, %r9 -+ -+ or %r9, %rax -+ jnz L(32) -+ -+ lea 64 (%rsi), %rsi -+ lea 64 (%rdi), %rdi -+ -+ sub $64, %rdx -+ dec %rcx -+ jnz L(64loop) -+ -+# .p2align 4 -+ -+L(64skip): -+ cmp $2048, %rdx -+ ja L(64after) -+ -+ test %edx, %edx -+ jnz L(32) -+ -+ xor %eax, %eax -+ ret -+ -+ .p2align 4 -+ -+L(64after): -+ -+L(128try): -+ -+L(128): # 128-byte -+ mov %rdx, %rcx -+ shr $7, %rcx -+ jz L(128skip) -+ -+ .p2align 4 -+ -+L(128loop): -+ prefetcht0 512 (%rsi) -+ prefetcht0 512 (%rdi) -+ -+ mov (%rsi), %rax -+ mov 8 (%rsi), %r8 -+ sub (%rdi), %rax -+ sub 8 (%rdi), %r8 -+ mov 16 (%rsi), %r9 -+ mov 24 (%rsi), %r10 -+ sub 16 (%rdi), %r9 -+ sub 24 (%rdi), %r10 -+ -+ or %r8, %rax -+ or %r9, %r10 -+ or %r10, %rax -+ -+ mov 32 (%rsi), %r8 -+ mov 40 (%rsi), %r9 -+ sub 32 (%rdi), %r8 -+ sub 40 (%rdi), %r9 -+ mov 48 (%rsi), %r10 -+ mov 56 (%rsi), %r11 -+ sub 48 (%rdi), %r10 -+ sub 56 (%rdi), %r11 -+ -+ or %r9, %r8 -+ or %r11, %r10 -+ or %r10, %r8 -+ -+ or %r8, %rax -+ jnz L(32) -+ -+ prefetcht0 576 (%rsi) -+ prefetcht0 576 (%rdi) -+ -+ mov 64 (%rsi), %rax -+ mov 72 (%rsi), %r8 -+ sub 64 (%rdi), %rax -+ sub 72 (%rdi), %r8 -+ mov 80 (%rsi), %r9 -+ mov 88 (%rsi), %r10 -+ sub 80 (%rdi), %r9 -+ sub 88 (%rdi), %r10 -+ -+ or %r8, %rax -+ or %r9, %r10 -+ or %r10, %rax -+ -+ mov 96 (%rsi), %r8 -+ mov 104 (%rsi), %r9 -+ sub 96 (%rdi), %r8 -+ sub 104 (%rdi), %r9 -+ mov 112 (%rsi), %r10 -+ mov 120 (%rsi), %r11 -+ sub 112 (%rdi), %r10 -+ sub 120 (%rdi), %r11 -+ -+ or %r9, %r8 -+ or %r11, %r10 -+ or %r10, %r8 -+ -+ or %r8, %rax -+ jnz L(32) -+ -+ sub $128, %rdx -+ dec %rcx -+ -+ lea 128 (%rsi), %rsi -+ lea 128 (%rdi), %rdi -+ -+ jnz L(128loop) -+ -+L(128skip): -+ and $127, %edx -+ jnz L(32) -+ -+ xor %eax, %eax -+ ret -+ -+END (memcmp) -+ -+#undef bcmp -+weak_alias (memcmp, bcmp) -diff -Npruw -x CVS -x vssver.scc -x powerpc -x sync_file_range.c libc/sysdeps/x86_64/memcpy.S libc/sysdeps/x86_64/memcpy.S ---- libc/sysdeps/x86_64/memcpy.S 2004-10-17 23:17:08.000000000 -0500 -+++ libc/sysdeps/x86_64/memcpy.S 2006-05-18 15:23:45.311446000 -0500 -@@ -1,32 +1,22 @@ --/* Highly optimized version for x86-64. -- Copyright (C) 1997, 2000, 2002, 2003, 2004 Free Software Foundation, Inc. -- This file is part of the GNU C Library. -- Based on i586 version contributed by Ulrich Drepper , 1997. -- -- The GNU C Library is free software; you can redistribute it and/or -- modify it under the terms of the GNU Lesser General Public -- License as published by the Free Software Foundation; either -- version 2.1 of the License, or (at your option) any later version. -- -- The GNU C Library is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- Lesser General Public License for more details. -- -- You should have received a copy of the GNU Lesser General Public -- License along with the GNU C Library; if not, write to the Free -- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -- 02111-1307 USA. */ -+# (c) 2002 Advanced Micro Devices, Inc. -+# YOUR USE OF THIS CODE IS SUBJECT TO THE TERMS -+# AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC -+# LICENSE FOUND IN THE "README" FILE THAT IS -+# INCLUDED WITH THIS FILE - - #include - #include "asm-syntax.h" - #include "bp-sym.h" - #include "bp-asm.h" -+#if defined PIC && defined SHARED -+# include -+#endif - --/* BEWARE: `#ifdef memcpy' means that memcpy is redefined as `mempcpy', -- and the return value is the byte after the last one copied in -- the destination. */ --#define MEMPCPY_P (defined memcpy) -+#if defined PIC && defined SHARED -+ .globl _rtld_local_ro -+ .hidden _rtld_local_ro -+ .set _rtld_local_ro,_rtld_global_ro -+#endif - - .text - #if defined PIC && !defined NOT_IN_libc -@@ -35,67 +25,480 @@ ENTRY (__memcpy_chk) - jb HIDDEN_JUMPTARGET (__chk_fail) - END (__memcpy_chk) - #endif --ENTRY (BP_SYM (memcpy)) -- /* Cutoff for the big loop is a size of 32 bytes since otherwise -- the loop will never be entered. */ -- cmpq $32, %rdx -- movq %rdx, %rcx --#if !MEMPCPY_P -- movq %rdi, %r10 /* Save value. */ -+ -+ENTRY (memcpy) # (void *, const void*, size_t) -+ -+L(1try): # up to 16B -+ cmp $16, %rdx -+#if defined (USE_AS_MEMPCPY) -+ lea (%rdi, %rdx), %rax -+#else -+ mov %rdi, %rax - #endif -+ jae L(1after) -+ -+L(1): # 1-byte once -+ test $1, %dl -+ jz L(1a) -+ -+ movzbl (%rsi), %ecx -+ mov %cl, (%rdi) -+ -+ inc %rsi -+ inc %rdi -+ -+L(1a): # 2-byte once -+ test $2, %dl -+ jz L(1b) -+ -+ movzwl (%rsi), %ecx -+ mov %cx, (%rdi) -+ -+ add $2, %rsi -+ add $2, %rdi -+ -+L(1b): # 4-byte once -+ test $4, %dl -+ jz L(1c) -+ -+ mov (%rsi), %ecx -+ mov %ecx, (%rdi) -+ -+ add $4, %rsi -+ add $4, %rdi -+ -+L(1c): # 8-byte once -+ test $8, %dl -+ jz L(exit) -+ -+ mov (%rsi), %rcx -+ mov %rcx, (%rdi) -+ -+L(exit): -+ rep -+ ret -+ -+ .p2align 4 -+ -+L(1after): -+ push %rax -+ -+L(8try): # up to 32B -+ cmp $32, %rdx -+ jae L(8after) -+ -+L(8): # 8-byte loop -+ mov %edx, %ecx -+ shr $3, %ecx -+ jz L(8skip) -+ -+ .p2align 4 -+ -+L(8loop): -+ dec %ecx -+ -+ mov (%rsi), %rax -+ mov %rax, (%rdi) -+ -+ lea 8 (%rsi), %rsi -+ lea 8 (%rdi), %rdi -+ -+ jnz L(8loop) -+ -+L(8skip): -+ and $7, %edx # check for left overs -+ pop %rax -+ jnz L(1) -+ -+ rep -+ ret -+ -+ .p2align 4 -+ -+L(8after): -+ -+L(aligntry): -+ mov %edi, %r8d # align by destination -+ -+ and $7, %r8d -+ jz L(alignafter) # not unaligned -+ -+L(align): # align -+ lea -8 (%r8, %rdx), %rdx -+ sub $8, %r8d -+ -+ .p2align 4 -+ -+L(alignloop): -+ inc %r8d -+ -+ mov (%rsi), %al -+ mov %al, (%rdi) -+ -+ lea 1 (%rsi), %rsi -+ lea 1 (%rdi), %rdi -+ -+ jnz L(alignloop) -+ -+ .p2align 4 -+ -+L(alignafter): -+ -+L(32try): # up to 1KB -+ cmp $1024, %rdx -+ ja L(32after) -+ -+L(32): # 32-byte loop -+ mov %edx, %ecx -+ shr $5, %ecx -+ jz L(32skip) -+ -+ .p2align 4 -+ -+L(32loop): -+ dec %ecx -+ -+ mov (%rsi), %rax -+ mov 8 (%rsi), %r8 -+ mov 16 (%rsi), %r9 -+ mov 24 (%rsi), %r10 -+ -+ mov %rax, (%rdi) -+ mov %r8, 8 (%rdi) -+ mov %r9, 16 (%rdi) -+ mov %r10, 24 (%rdi) -+ -+ lea 32 (%rsi), %rsi -+ lea 32 (%rdi), %rdi -+ -+ jz L(32skip) - -- /* We need this in any case. */ -- cld -+ dec %ecx - -- jbe 1f -+ mov (%rsi), %rax -+ mov 8 (%rsi), %r8 -+ mov 16 (%rsi), %r9 -+ mov 24 (%rsi), %r10 - -- /* Align destination. */ -- movq %rdi, %rax -- negq %rax -- andq $7, %rax -- subq %rax, %rcx -- xchgq %rax, %rcx -+ mov %rax, (%rdi) -+ mov %r8, 8 (%rdi) -+ mov %r9, 16 (%rdi) -+ mov %r10, 24 (%rdi) - -- rep; movsb -+ lea 32 (%rsi), %rsi -+ lea 32 (%rdi), %rdi - -- movq %rax, %rcx -- subq $32, %rcx -- js 2f -+ jnz L(32loop) - - .p2align 4 --3: - -- /* Now correct the loop counter. Please note that in the following -- code the flags are not changed anymore. */ -- subq $32, %rcx -+L(32skip): -+ and $31, %edx # check for left overs -+ jnz L(8) -+ -+ pop %rax -+ ret -+ -+ .p2align 4 -+ -+L(32after): -+ -+L(fasttry): # first 1/2 L1 -+#ifdef PIC -+# ifdef SHARED -+ mov _rtld_local_ro@GOTPCREL (%rip), %r11 -+ mov RTLD_GLOBAL_DL_CACHE1SIZEHALF (%r11), %r11 -+# else -+ mov _dl_cache1sizehalf@GOTPCREL (%rip), %r11 -+ mov (%r11), %r11 -+# endif -+#else -+ mov _dl_cache1sizehalf, %r11 -+#endif -+ cmp %rdx, %r11 -+ cmova %rdx, %r11 - -- movq (%rsi), %rax -- movq 8(%rsi), %rdx -- movq 16(%rsi), %r8 -- movq 24(%rsi), %r9 -- movq %rax, (%rdi) -- movq %rdx, 8(%rdi) -- movq %r8, 16(%rdi) -- movq %r9, 24(%rdi) -+L(fast): # good ol' MOVS -+ mov %r11, %rcx -+ and $-8, %r11 -+ shr $3, %rcx -+ jz L(fastskip) -+ -+ rep -+ movsq -+ -+L(fastskip): -+ sub %r11, %rdx # check for more -+ test $-8, %rdx -+ jnz L(fastafter) -+ -+ and $7, %edx # check for left overs -+ pop %rax -+ jnz L(1) - -- leaq 32(%rsi), %rsi -- leaq 32(%rdi), %rdi -+ rep -+ ret - -- jns 3b -+ .p2align 4 - -- /* Correct extra loop counter modification. */ --2: addq $32, %rcx --1: rep; movsb -+L(fastafter): - --#if MEMPCPY_P -- movq %rdi, %rax /* Set return value. */ -+L(pretry): # first 1/2 L2 -+#ifdef PIC -+# ifdef SHARED -+ mov _rtld_local_ro@GOTPCREL (%rip), %r8 -+ mov RTLD_GLOBAL_DL_CACHE2SIZEHALF (%r8), %r8 - #else -- movq %r10, %rax /* Set return value. */ -+ mov _dl_cache2sizehalf@GOTPCREL (%rip), %r8 -+ mov (%r8), %r8 -+# endif -+#else -+ mov _dl_cache2sizehalf, %r8 -+#endif -+ cmp %rdx, %r8 -+ cmova %rdx, %r8 - -+L(pre): # 64-byte with prefetching -+ mov %r8, %rcx -+ and $-64, %r8 -+ shr $6, %rcx -+ jz L(preskip) -+ -+ push %r14 -+ push %r13 -+ push %r12 -+ push %rbx -+ -+#ifdef PIC -+# ifdef SHARED -+ mov _rtld_local_ro@GOTPCREL (%rip), %rax -+ cmpl $0, RTLD_GLOBAL_DL_PREFETCHW (%rax) -+# else -+ mov _dl_prefetchw@GOTPCREL (%rip), %rax -+ cmpl $0, (%rax) -+# endif -+#else -+ cmpl $0, _dl_prefetchw - #endif -+ jz L(preloop) -+ -+ .p2align 4 -+ -+L(prewloop): # to state M -+ dec %rcx -+ -+ mov (%rsi), %rax -+ mov 8 (%rsi), %rbx -+ mov 16 (%rsi), %r9 -+ mov 24 (%rsi), %r10 -+ mov 32 (%rsi), %r11 -+ mov 40 (%rsi), %r12 -+ mov 48 (%rsi), %r13 -+ mov 56 (%rsi), %r14 -+ -+ prefetcht0 0 + 896 (%rsi) -+ prefetcht0 64 + 896 (%rsi) -+ -+ mov %rax, (%rdi) -+ mov %rbx, 8 (%rdi) -+ mov %r9, 16 (%rdi) -+ mov %r10, 24 (%rdi) -+ mov %r11, 32 (%rdi) -+ mov %r12, 40 (%rdi) -+ mov %r13, 48 (%rdi) -+ mov %r14, 56 (%rdi) -+ -+ lea 64 (%rsi), %rsi -+ lea 64 (%rdi), %rdi -+ -+ jz L(prebail) -+ -+ dec %rcx -+ -+ mov (%rsi), %rax -+ mov 8 (%rsi), %rbx -+ mov 16 (%rsi), %r9 -+ mov 24 (%rsi), %r10 -+ mov 32 (%rsi), %r11 -+ mov 40 (%rsi), %r12 -+ mov 48 (%rsi), %r13 -+ mov 56 (%rsi), %r14 -+ -+ mov %rax, (%rdi) -+ mov %rbx, 8 (%rdi) -+ mov %r9, 16 (%rdi) -+ mov %r10, 24 (%rdi) -+ mov %r11, 32 (%rdi) -+ mov %r12, 40 (%rdi) -+ mov %r13, 48 (%rdi) -+ mov %r14, 56 (%rdi) -+ -+ prefetchw 896 - 64 (%rdi) -+ prefetchw 896 - 0 (%rdi) -+ -+ lea 64 (%rsi), %rsi -+ lea 64 (%rdi), %rdi -+ -+ jnz L(prewloop) -+ jmp L(prebail) -+ -+ .p2align 4 -+ -+L(preloop): # to state E -+ dec %rcx -+ -+ mov (%rsi), %rax -+ mov 8 (%rsi), %rbx -+ mov 16 (%rsi), %r9 -+ mov 24 (%rsi), %r10 -+ mov 32 (%rsi), %r11 -+ mov 40 (%rsi), %r12 -+ mov 48 (%rsi), %r13 -+ mov 56 (%rsi), %r14 -+ -+ prefetcht0 896 + 0 (%rsi) -+ prefetcht0 896 + 64 (%rsi) -+ -+ mov %rax, (%rdi) -+ mov %rbx, 8 (%rdi) -+ mov %r9, 16 (%rdi) -+ mov %r10, 24 (%rdi) -+ mov %r11, 32 (%rdi) -+ mov %r12, 40 (%rdi) -+ mov %r13, 48 (%rdi) -+ mov %r14, 56 (%rdi) -+ -+ lea 64 (%rsi), %rsi -+ lea 64 (%rdi), %rdi -+ -+ jz L(prebail) -+ -+ dec %rcx -+ -+ mov (%rsi), %rax -+ mov 8 (%rsi), %rbx -+ mov 16 (%rsi), %r9 -+ mov 24 (%rsi), %r10 -+ mov 32 (%rsi), %r11 -+ mov 40 (%rsi), %r12 -+ mov 48 (%rsi), %r13 -+ mov 56 (%rsi), %r14 -+ -+ prefetcht0 896 - 64 (%rdi) -+ prefetcht0 896 - 0 (%rdi) -+ -+ mov %rax, (%rdi) -+ mov %rbx, 8 (%rdi) -+ mov %r9, 16 (%rdi) -+ mov %r10, 24 (%rdi) -+ mov %r11, 32 (%rdi) -+ mov %r12, 40 (%rdi) -+ mov %r13, 48 (%rdi) -+ mov %r14, 56 (%rdi) -+ -+ lea 64 (%rsi), %rsi -+ lea 64 (%rdi), %rdi -+ -+ jnz L(preloop) -+ -+L(prebail): -+ pop %rbx -+ pop %r12 -+ pop %r13 -+ pop %r14 -+ -+# .p2align 4 -+ -+L(preskip): -+ sub %r8, %rdx # check for more -+ test $-64, %rdx -+ jnz L(preafter) -+ -+ and $63, %edx # check for left overs -+ jnz L(32) -+ -+ pop %rax -+ ret -+ -+ .p2align 4 -+ -+L(preafter): -+ -+L(NTtry): -+ -+L(NT): # NT 128-byte -+ mov %rdx, %rcx -+ shr $7, %rcx -+ jz L(NTskip) -+ -+ push %r14 -+ push %r13 -+ push %r12 -+ -+ .p2align 4 -+ -+L(NTloop): -+ prefetchnta 768 (%rsi) -+ prefetchnta 832 (%rsi) -+ -+ dec %rcx -+ -+ mov (%rsi), %rax -+ mov 8 (%rsi), %r8 -+ mov 16 (%rsi), %r9 -+ mov 24 (%rsi), %r10 -+ mov 32 (%rsi), %r11 -+ mov 40 (%rsi), %r12 -+ mov 48 (%rsi), %r13 -+ mov 56 (%rsi), %r14 -+ -+ movnti %rax, (%rdi) -+ movnti %r8, 8 (%rdi) -+ movnti %r9, 16 (%rdi) -+ movnti %r10, 24 (%rdi) -+ movnti %r11, 32 (%rdi) -+ movnti %r12, 40 (%rdi) -+ movnti %r13, 48 (%rdi) -+ movnti %r14, 56 (%rdi) -+ -+ mov 64 (%rsi), %rax -+ mov 72 (%rsi), %r8 -+ mov 80 (%rsi), %r9 -+ mov 88 (%rsi), %r10 -+ mov 96 (%rsi), %r11 -+ mov 104 (%rsi), %r12 -+ mov 112 (%rsi), %r13 -+ mov 120 (%rsi), %r14 -+ -+ movnti %rax, 64 (%rdi) -+ movnti %r8, 72 (%rdi) -+ movnti %r9, 80 (%rdi) -+ movnti %r10, 88 (%rdi) -+ movnti %r11, 96 (%rdi) -+ movnti %r12, 104 (%rdi) -+ movnti %r13, 112 (%rdi) -+ movnti %r14, 120 (%rdi) -+ -+ lea 128 (%rsi), %rsi -+ lea 128 (%rdi), %rdi -+ -+ jnz L(NTloop) -+ -+ mfence # serialize memory operations -+ -+ pop %r12 -+ pop %r13 -+ pop %r14 -+ -+L(NTskip): -+ and $127, %edx # check for left overs -+ jnz L(32) -+ -+ pop %rax - ret - --END (BP_SYM (memcpy)) --#if !MEMPCPY_P -+END (memcpy) -+ -+#ifndef USE_AS_MEMPCPY - libc_hidden_builtin_def (memcpy) - #endif -diff -Npruw -x CVS -x vssver.scc -x powerpc -x sync_file_range.c libc/sysdeps/x86_64/mempcpy.S libc/sysdeps/x86_64/mempcpy.S ---- libc/sysdeps/x86_64/mempcpy.S 2004-10-17 23:17:08.000000000 -0500 -+++ libc/sysdeps/x86_64/mempcpy.S 2006-05-05 15:24:18.279191000 -0500 -@@ -1,3 +1,4 @@ -+#define USE_AS_MEMPCPY - #define memcpy __mempcpy - #define __memcpy_chk __mempcpy_chk - #include -diff -Npruw -x CVS -x vssver.scc -x powerpc -x sync_file_range.c libc/sysdeps/x86_64/memset.S libc/sysdeps/x86_64/memset.S ---- libc/sysdeps/x86_64/memset.S 2005-03-31 04:00:13.000000000 -0600 -+++ libc/sysdeps/x86_64/memset.S 2006-05-15 11:38:13.737756000 -0500 -@@ -1,145 +1,322 @@ --/* memset/bzero -- set memory area to CH/0 -- Optimized version for x86-64. -- Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc. -- This file is part of the GNU C Library. -- Contributed by Andreas Jaeger . -- -- The GNU C Library is free software; you can redistribute it and/or -- modify it under the terms of the GNU Lesser General Public -- License as published by the Free Software Foundation; either -- version 2.1 of the License, or (at your option) any later version. -- -- The GNU C Library is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- Lesser General Public License for more details. -- -- You should have received a copy of the GNU Lesser General Public -- License along with the GNU C Library; if not, write to the Free -- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -- 02111-1307 USA. */ -+# (c) 2002 Advanced Micro Devices, Inc. -+# YOUR USE OF THIS CODE IS SUBJECT TO THE TERMS -+# AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC -+# LICENSE FOUND IN THE "README" FILE THAT IS -+# INCLUDED WITH THIS FILE - - #include - #include "asm-syntax.h" - #include "bp-sym.h" - #include "bp-asm.h" -+#if defined PIC && defined SHARED -+# include -+#endif - --/* BEWARE: `#ifdef memset' means that memset is redefined as `bzero' */ --#define BZERO_P (defined memset) -- --/* This is somehow experimental and could made dependend on the cache -- size. */ --#define LARGE $120000 -+#if defined PIC && defined SHARED -+ .globl _rtld_local_ro -+ .hidden _rtld_local_ro -+ .set _rtld_local_ro,_rtld_global_ro -+#endif - - .text --#if !BZERO_P && defined PIC && !defined NOT_IN_libc -+#if !defined USE_AS_BZERO && defined PIC && !defined NOT_IN_libc - ENTRY (__memset_chk) - cmpq %rdx, %rcx - jb HIDDEN_JUMPTARGET (__chk_fail) - END (__memset_chk) - #endif --ENTRY (memset) --#if BZERO_P -- mov %rsi,%rdx /* Adjust parameter. */ -- xorl %esi,%esi /* Fill with 0s. */ --#endif -- cmp $0x7,%rdx /* Check for small length. */ -- mov %rdi,%rcx /* Save ptr as return value. */ -- jbe 7f - --#if BZERO_P -- mov %rsi,%r8 /* Just copy 0. */ -+ENTRY (memset) # (void *, const void*, size_t) -+ -+#ifdef USE_AS_BZERO -+ mov %rsi, %rdx # memset doubles as bzero -+ xorl %esi, %esi -+#else -+ mov $0x0101010101010101, %rcx # memset is itself -+ movzx %sil, %rsi -+ imul %rcx, %rsi # replicate 8 times -+#endif -+ -+L(try1): # up to 64B -+ cmp $64, %rdx -+ mov %rdi, %rax # return memory block address (even for bzero ()) -+ jae L(1after) -+ -+L(1): # 1-byte loop -+ test $1, %dl -+ jz L(1a) -+ -+ mov %sil, (%rdi) -+ inc %rdi -+ -+L(1a): -+ test $2, %dl -+ jz L(1b) -+ -+ mov %si, (%rdi) -+ add $2, %rdi -+ -+L(1b): -+ test $4, %dl -+ jz L(1c) -+ -+ mov %esi, (%rdi) -+ add $4, %rdi -+ -+L(1c): -+ test $8, %dl -+ jz L(1d) -+ -+ mov %rsi, (%rdi) -+ add $8, %rdi -+ -+L(1d): -+ test $16, %dl -+ jz L(1e) -+ -+ mov %rsi, (%rdi) -+ mov %rsi, 8 (%rdi) -+ add $16, %rdi -+ -+L(1e): -+ test $32, %dl -+ jz L(1f) -+ -+ mov %rsi, (%rdi) -+ mov %rsi, 8 (%rdi) -+ mov %rsi, 16 (%rdi) -+ mov %rsi, 24 (%rdi) -+# add $32, %rdi -+ -+L(1f): -+ -+L(exit): -+ rep -+ ret -+ -+ .p2align 4 -+ -+L(1after): -+ -+L(32try): # up to 512B -+ cmp $512, %rdx -+ ja L(32after) -+ -+L(32): # 32-byte loop -+ mov %edx, %ecx -+ shr $5, %ecx -+ jz L(32skip) -+ -+ .p2align 4 -+ -+L(32loop): -+ dec %ecx -+ -+ mov %rsi, (%rdi) -+ mov %rsi, 8 (%rdi) -+ mov %rsi, 16 (%rdi) -+ mov %rsi, 24 (%rdi) -+ -+ lea 32 (%rdi), %rdi -+ -+ jz L(32skip) -+ -+ dec %ecx -+ -+ mov %rsi, (%rdi) -+ mov %rsi, 8 (%rdi) -+ mov %rsi, 16 (%rdi) -+ mov %rsi, 24 (%rdi) -+ -+ lea 32 (%rdi), %rdi -+ -+ jnz L(32loop) -+ -+ .p2align 4 -+ -+L(32skip): -+ and $31, %edx # check for left overs -+ jnz L(1) -+ -+ rep -+ ret -+ -+ .p2align 4 -+ -+L(32after): -+ -+L(aligntry): -+ mov %edi, %ecx # align by destination -+ -+ and $7, %ecx # skip if already aligned -+ jz L(alignafter) -+ -+L(align): # align loop -+ lea -8 (%rcx, %rdx), %rdx -+ sub $8, %ecx -+ -+ .p2align 4 -+ -+L(alignloop): -+ inc %ecx -+ -+ mov %sil, (%rdi) -+ lea 1 (%rdi), %rdi -+ -+ jnz L(alignloop) -+ -+ .p2align 4 -+ -+L(alignafter): -+ -+# For MP System half cache size is better, -+# for UP full cache size is better. -+# Use half cache size only. -+L(fasttry): # between 2KB and 1/2 L2 -+#ifdef PIC -+# ifdef SHARED -+ mov _rtld_local_ro@GOTPCREL (%rip), %r8 -+ mov RTLD_GLOBAL_DL_CACHE2SIZEHALF (%r8), %r8 - #else -- /* Populate 8 bit data to full 64-bit. */ -- movabs $0x0101010101010101,%r8 -- movzbl %sil,%eax -- imul %rax,%r8 --#endif -- test $0x7,%edi /* Check for alignment. */ -- je 2f -- -- .p2align 4 --1: /* Align ptr to 8 byte. */ -- mov %sil,(%rcx) -- dec %rdx -- inc %rcx -- test $0x7,%ecx -- jne 1b -- --2: /* Check for really large regions. */ -- mov %rdx,%rax -- shr $0x6,%rax -- je 4f -- cmp LARGE, %rdx -- jae 11f -- -- .p2align 4 --3: /* Copy 64 bytes. */ -- mov %r8,(%rcx) -- mov %r8,0x8(%rcx) -- mov %r8,0x10(%rcx) -- mov %r8,0x18(%rcx) -- mov %r8,0x20(%rcx) -- mov %r8,0x28(%rcx) -- mov %r8,0x30(%rcx) -- mov %r8,0x38(%rcx) -- add $0x40,%rcx -- dec %rax -- jne 3b -- --4: /* Copy final bytes. */ -- and $0x3f,%edx -- mov %rdx,%rax -- shr $0x3,%rax -- je 6f -- --5: /* First in chunks of 8 bytes. */ -- mov %r8,(%rcx) -- add $0x8,%rcx -- dec %rax -- jne 5b --6: -- and $0x7,%edx --7: -- test %rdx,%rdx -- je 9f --8: /* And finally as bytes (up to 7). */ -- mov %sil,(%rcx) -- inc %rcx -- dec %rdx -- jne 8b --9: --#if BZERO_P -- nop -+ mov _dl_cache2sizehalf@GOTPCREL (%rip), %r8 -+ mov (%r8), %r8 -+# endif - #else -- /* Load result (only if used as memset). */ -- mov %rdi,%rax /* start address of destination is result */ -+ mov _dl_cache2sizehalf, %r8 - #endif -- retq -+ cmp %rdx, %r8 -+ cmova %rdx, %r8 -+ -+ cmp $2048, %rdx # this is slow for some block sizes -+ jb L(64) -+ -+L(fast): # microcode loop -+ mov %r8, %rcx -+ and $-8, %r8 -+ shr $3, %rcx -+ -+ xchg %rax, %rsi -+ -+ rep -+ stosq -+ -+ xchg %rax, %rsi -+ -+L(fastskip): -+ sub %r8, %rdx # check for more -+ ja L(64after) -+ -+ and $7, %edx # check for left overs -+ jnz L(1) -+ -+ rep -+ ret - - .p2align 4 --11: /* Copy 64 bytes without polluting the cache. */ -- /* We could use movntdq %xmm0,(%rcx) here to further -- speed up for large cases but let's not use XMM registers. */ -- movnti %r8,(%rcx) -- movnti %r8,0x8(%rcx) -- movnti %r8,0x10(%rcx) -- movnti %r8,0x18(%rcx) -- movnti %r8,0x20(%rcx) -- movnti %r8,0x28(%rcx) -- movnti %r8,0x30(%rcx) -- movnti %r8,0x38(%rcx) -- add $0x40,%rcx -- dec %rax -- jne 11b -- jmp 4b -+ -+L(fastafter): -+ -+L(64try): # up to 2KB -+ -+L(64): # 64-byte loop -+ mov %r8, %rcx -+ and $-64, %r8 -+ shr $6, %rcx -+ -+ dec %rcx # this iteration starts the prefetcher sooner -+ -+ mov %rsi, (%rdi) -+ mov %rsi, 8 (%rdi) -+ mov %rsi, 16 (%rdi) -+ mov %rsi, 24 (%rdi) -+ mov %rsi, 32 (%rdi) -+ mov %rsi, 40 (%rdi) -+ mov %rsi, 48 (%rdi) -+ mov %rsi, 56 (%rdi) -+ -+ lea 64 (%rdi), %rdi -+ -+ .p2align 4 -+ -+L(64loop): -+ dec %rcx -+ -+ mov %rsi, (%rdi) -+ mov %rsi, 8 (%rdi) -+ mov %rsi, 16 (%rdi) -+ mov %rsi, 24 (%rdi) -+ mov %rsi, 32 (%rdi) -+ mov %rsi, 40 (%rdi) -+ mov %rsi, 48 (%rdi) -+ mov %rsi, 56 (%rdi) -+ -+ lea 64 (%rdi), %rdi -+ -+ jnz L(64loop) -+ -+L(64skip): -+ sub %r8, %rdx # check for more -+ ja L(64after) -+ -+ and $63, %edx # check for left overs -+ jnz L(32) -+ -+ rep -+ ret -+ -+ .p2align 4 -+ -+L(64after): -+ -+L(NTtry): -+ -+L(NT): # 128-byte NT loop -+ mov %rdx, %rcx -+ shr $7, %rcx -+ jz L(NTskip) -+ -+ .p2align 4 -+ -+L(NTloop): # on an MP system it would be better to prefetchnta 320 (%rdi) and 384 (%rdi) here, but not so on an 1P system -+ dec %rcx -+ -+ movnti %rsi, (%rdi) -+ movnti %rsi, 8 (%rdi) -+ movnti %rsi, 16 (%rdi) -+ movnti %rsi, 24 (%rdi) -+ movnti %rsi, 32 (%rdi) -+ movnti %rsi, 40 (%rdi) -+ movnti %rsi, 48 (%rdi) -+ movnti %rsi, 56 (%rdi) -+ movnti %rsi, 64 (%rdi) -+ movnti %rsi, 72 (%rdi) -+ movnti %rsi, 80 (%rdi) -+ movnti %rsi, 88 (%rdi) -+ movnti %rsi, 96 (%rdi) -+ movnti %rsi, 104 (%rdi) -+ movnti %rsi, 112 (%rdi) -+ movnti %rsi, 120 (%rdi) -+ -+ lea 128 (%rdi), %rdi -+ -+ jnz L(NTloop) -+ -+ mfence # serialize memory operations -+ -+L(NTskip): -+ and $127, %edx # check for left overs -+ jnz L(32) -+ -+ rep -+ ret - - END (memset) --#if !BZERO_P -+ -+#ifndef USE_AS_BZERO - libc_hidden_builtin_def (memset) - #endif - --#if !BZERO_P && defined PIC && !defined NOT_IN_libc -+#if !defined USE_AS_BZERO && defined PIC && !defined NOT_IN_libc - strong_alias (__memset_chk, __memset_zero_constant_len_parameter) - .section .gnu.warning.__memset_zero_constant_len_parameter - .string "memset used with constant zero length parameter; this could be due to transposed parameters" -diff -Npruw -x CVS -x vssver.scc -x powerpc -x sync_file_range.c libc/sysdeps/x86_64/stpcpy.S libc/sysdeps/x86_64/stpcpy.S ---- libc/sysdeps/x86_64/stpcpy.S 2004-05-28 01:39:37.000000000 -0500 -+++ libc/sysdeps/x86_64/stpcpy.S 2006-05-05 15:24:41.775991000 -0500 -@@ -1,5 +1,5 @@ - #define USE_AS_STPCPY --#define STRCPY __stpcpy -+#define strcpy __stpcpy - - #include - -diff -Npruw -x CVS -x vssver.scc -x powerpc -x sync_file_range.c libc/sysdeps/x86_64/stpncpy.S libc/sysdeps/x86_64/stpncpy.S ---- libc/sysdeps/x86_64/stpncpy.S 1969-12-31 18:00:00.000000000 -0600 -+++ libc/sysdeps/x86_64/stpncpy.S 2006-05-05 15:24:50.748541000 -0500 -@@ -0,0 +1,9 @@ -+#define USE_AS_STRNCPY -+#define USE_AS_STPCPY -+#define strcpy __stpncpy -+ -+#include -+ -+weak_alias (__stpncpy, stpncpy) -+libc_hidden_def (__stpncpy) -+libc_hidden_builtin_def (stpncpy) -diff -Npruw -x CVS -x vssver.scc -x powerpc -x sync_file_range.c libc/sysdeps/x86_64/strcpy.S libc/sysdeps/x86_64/strcpy.S ---- libc/sysdeps/x86_64/strcpy.S 2003-04-29 17:47:18.000000000 -0500 -+++ libc/sysdeps/x86_64/strcpy.S 2006-05-19 13:41:31.281326000 -0500 -@@ -1,159 +1,1141 @@ --/* strcpy/stpcpy implementation for x86-64. -- Copyright (C) 2002 Free Software Foundation, Inc. -- This file is part of the GNU C Library. -- Contributed by Andreas Jaeger , 2002. -- -- The GNU C Library is free software; you can redistribute it and/or -- modify it under the terms of the GNU Lesser General Public -- License as published by the Free Software Foundation; either -- version 2.1 of the License, or (at your option) any later version. -- -- The GNU C Library is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- Lesser General Public License for more details. -- -- You should have received a copy of the GNU Lesser General Public -- License along with the GNU C Library; if not, write to the Free -- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -- 02111-1307 USA. */ -- --#include --#include "asm-syntax.h" --#include "bp-sym.h" --#include "bp-asm.h" -+# (c) 2002 Advanced Micro Devices, Inc. -+# YOUR USE OF THIS CODE IS SUBJECT TO THE TERMS -+# AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC -+# LICENSE FOUND IN THE "README" FILE THAT IS -+# INCLUDED WITH THIS FILE -+ -+#include "sysdep.h" -+#if defined PIC && defined SHARED -+# include -+#endif - --#ifndef USE_AS_STPCPY --# define STRCPY strcpy -+#if defined PIC && defined SHARED -+ .globl _rtld_local_ro -+ .hidden _rtld_local_ro -+ .set _rtld_local_ro,_rtld_global_ro - #endif - - .text --ENTRY (BP_SYM (STRCPY)) -- movq %rsi, %rcx /* Source register. */ -- andl $7, %ecx /* mask alignment bits */ -- movq %rdi, %rdx /* Duplicate destination pointer. */ -- -- jz 5f /* aligned => start loop */ -- -- neg %ecx /* We need to align to 8 bytes. */ -- addl $8,%ecx -- /* Search the first bytes directly. */ --0: -- movb (%rsi), %al /* Fetch a byte */ -- testb %al, %al /* Is it NUL? */ -- movb %al, (%rdx) /* Store it */ -- jz 4f /* If it was NUL, done! */ -- incq %rsi -- incq %rdx -- decl %ecx -- jnz 0b -- --5: -- movq $0xfefefefefefefeff,%r8 -- -- /* Now the sources is aligned. Unfortunatly we cannot force -- to have both source and destination aligned, so ignore the -- alignment of the destination. */ -+ -+ENTRY (strcpy) # (char *, const char *) -+ -+#ifdef USE_AS_STRNCPY // (char *, const char *, size_t) -+ test %rdx, %rdx -+ mov %rdx, %r11 -+ jz L(exit) # early exit -+#endif -+ -+ xor %edx, %edx -+ -+L(aligntry): # between 0 and 7 bytes -+ mov %rsi, %r8 # align by source -+ and $7, %r8 -+ jz L(alignafter) -+ -+L(align): # 8-byte align -+ sub $8, %r8 -+ - .p2align 4 --1: -- /* 1st unroll. */ -- movq (%rsi), %rax /* Read double word (8 bytes). */ -- addq $8, %rsi /* Adjust pointer for next word. */ -- movq %rax, %r9 /* Save a copy for NUL finding. */ -- addq %r8, %r9 /* add the magic value to the word. We get -- carry bits reported for each byte which -- is *not* 0 */ -- jnc 3f /* highest byte is NUL => return pointer */ -- xorq %rax, %r9 /* (word+magic)^word */ -- orq %r8, %r9 /* set all non-carry bits */ -- incq %r9 /* add 1: if one carry bit was *not* set -- the addition will not result in 0. */ -- -- jnz 3f /* found NUL => return pointer */ -- -- movq %rax, (%rdx) /* Write value to destination. */ -- addq $8, %rdx /* Adjust pointer. */ -- -- /* 2nd unroll. */ -- movq (%rsi), %rax /* Read double word (8 bytes). */ -- addq $8, %rsi /* Adjust pointer for next word. */ -- movq %rax, %r9 /* Save a copy for NUL finding. */ -- addq %r8, %r9 /* add the magic value to the word. We get -- carry bits reported for each byte which -- is *not* 0 */ -- jnc 3f /* highest byte is NUL => return pointer */ -- xorq %rax, %r9 /* (word+magic)^word */ -- orq %r8, %r9 /* set all non-carry bits */ -- incq %r9 /* add 1: if one carry bit was *not* set -- the addition will not result in 0. */ -- -- jnz 3f /* found NUL => return pointer */ -- -- movq %rax, (%rdx) /* Write value to destination. */ -- addq $8, %rdx /* Adjust pointer. */ -- -- /* 3rd unroll. */ -- movq (%rsi), %rax /* Read double word (8 bytes). */ -- addq $8, %rsi /* Adjust pointer for next word. */ -- movq %rax, %r9 /* Save a copy for NUL finding. */ -- addq %r8, %r9 /* add the magic value to the word. We get -- carry bits reported for each byte which -- is *not* 0 */ -- jnc 3f /* highest byte is NUL => return pointer */ -- xorq %rax, %r9 /* (word+magic)^word */ -- orq %r8, %r9 /* set all non-carry bits */ -- incq %r9 /* add 1: if one carry bit was *not* set -- the addition will not result in 0. */ -- -- jnz 3f /* found NUL => return pointer */ -- -- movq %rax, (%rdx) /* Write value to destination. */ -- addq $8, %rdx /* Adjust pointer. */ -- -- /* 4th unroll. */ -- movq (%rsi), %rax /* Read double word (8 bytes). */ -- addq $8, %rsi /* Adjust pointer for next word. */ -- movq %rax, %r9 /* Save a copy for NUL finding. */ -- addq %r8, %r9 /* add the magic value to the word. We get -- carry bits reported for each byte which -- is *not* 0 */ -- jnc 3f /* highest byte is NUL => return pointer */ -- xorq %rax, %r9 /* (word+magic)^word */ -- orq %r8, %r9 /* set all non-carry bits */ -- incq %r9 /* add 1: if one carry bit was *not* set -- the addition will not result in 0. */ -- -- jnz 3f /* found NUL => return pointer */ -- -- movq %rax, (%rdx) /* Write value to destination. */ -- addq $8, %rdx /* Adjust pointer. */ -- jmp 1b /* Next iteration. */ - -- /* Do the last few bytes. %rax contains the value to write. -- The loop is unrolled twice. */ -+L(alignloop): -+ movzbl (%rsi, %rdx), %eax -+ test %al, %al # check if character a NUL -+ mov %al, (%rdi, %rdx) -+ jz L(exit) -+ -+ inc %edx -+ -+#ifdef USE_AS_STRNCPY -+ dec %r11 -+ jz L(exit) -+#endif -+ -+ inc %r8 -+ jnz L(alignloop) -+ -+ .p2align 4,, 7 -+ -+L(alignafter): -+ -+L(8try): # up to 64 bytes -+ mov $0xfefefefefefefeff, %rcx -+ -+L(8): # 8-byte loop -+ -+L(8loop): -+#ifdef USE_AS_STRNCPY -+ sub $8, %r11 -+ jbe L(tail) -+#endif -+ -+ mov (%rsi, %rdx), %rax -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ jnc L(tail) # sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ inc %r8 # sub %r10, %r8 -+ jnz L(tail) -+ -+ mov %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %edx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %edx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ jnc L(tail) # sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ inc %r8 # sub %r10, %r8 -+ jnz L(tail) -+ -+ mov %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %edx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %edx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ jnc L(tail) # sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ inc %r8 # sub %r10, %r8 -+ jnz L(tail) -+ -+ mov %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %edx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %edx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ jnc L(tail) # sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ inc %r8 # sub %r10, %r8 -+ jnz L(tail) -+ -+ mov %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %edx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %edx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ jnc L(tail) # sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ inc %r8 # sub %r10, %r8 -+ jnz L(tail) -+ -+ mov %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %edx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %edx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ jnc L(tail) # sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ inc %r8 # sub %r10, %r8 -+ jnz L(tail) -+ -+ mov %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %edx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %edx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ jnc L(tail) # sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ inc %r8 # sub %r10, %r8 -+ jnz L(tail) -+ -+ mov %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %edx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %edx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ jnc L(tail) # sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ inc %r8 # sub %r10, %r8 -+ jnz L(tail) -+ -+ mov %rax, (%rdi, %rdx) -+ -+ add $8, %edx -+ -+L(8after): # up to 64 bytes -+ -+L(64try): # up to 1/2 L1 -+#ifdef PIC -+# ifdef SHARED -+ mov _rtld_local_ro@GOTPCREL (%rip), %r9 -+ mov RTLD_GLOBAL_DL_CACHE1SIZEHALF (%r9), %r9 -+# else -+ mov _dl_cache1sizehalf@GOTPCREL (%rip), %r9 -+ mov (%r9), %r9 -+# endif -+#else -+ mov _dl_cache1sizehalf, %r9 -+#endif -+ -+L(64): # 64-byte loop -+ - .p2align 4 --3: -- /* Note that stpcpy needs to return with the value of the NUL -- byte. */ -- movb %al, (%rdx) /* 1st byte. */ -- testb %al, %al /* Is it NUL. */ -- jz 4f /* yes, finish. */ -- incq %rdx /* Increment destination. */ -- movb %ah, (%rdx) /* 2nd byte. */ -- testb %ah, %ah /* Is it NUL?. */ -- jz 4f /* yes, finish. */ -- incq %rdx /* Increment destination. */ -- shrq $16, %rax /* Shift... */ -- jmp 3b /* and look at next two bytes in %rax. */ - --4: -+L(64loop): -+#ifdef USE_AS_STRNCPY -+ sub $8, %r11 -+ jbe L(tail) -+#endif -+ -+ mov (%rsi, %rdx), %rax -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ sub %r10, %r8 -+ jnz L(tail) -+ -+ mov %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %edx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %edx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ sub %r10, %r8 -+ jnz L(tail) -+ -+ mov %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %edx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %edx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ sub %r10, %r8 -+ jnz L(tail) -+ -+ mov %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %edx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %edx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ sub %r10, %r8 -+ jnz L(tail) -+ -+ mov %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %edx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %edx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ sub %r10, %r8 -+ jnz L(tail) -+ -+ mov %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %edx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %edx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ sub %r10, %r8 -+ jnz L(tail) -+ -+ mov %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %edx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %edx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ sub %r10, %r8 -+ jnz L(tail) -+ -+ mov %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %edx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %edx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ sub %r10, %r8 -+ jnz L(tail) -+ -+ mov %rax, (%rdi, %rdx) -+ -+ add $8, %edx -+ -+ cmp %r9, %rdx -+ jbe L(64loop) -+ -+L(64after): # up to 1/2 L1 -+ -+L(pretry): # up to 1/2 L2 -+#ifdef PIC -+# ifdef SHARED -+ mov _rtld_local_ro@GOTPCREL (%rip), %r9 -+ cmpl $0, RTLD_GLOBAL_DL_PREFETCHW (%r9) -+ mov RTLD_GLOBAL_DL_CACHE2SIZEHALF (%r9), %r9 -+# else -+ mov _dl_prefetchw@GOTPCREL (%rip), %r9 -+ cmpl $0, (%r9) -+ mov _dl_cache2sizehalf@GOTPCREL (%rip), %r9 -+ mov (%r9), %r9 -+# endif -+#else -+ cmpl $0, _dl_prefetchw -+ mov _dl_cache2sizehalf, %r9 -+#endif -+ jz L(preloop) # check for availability of PREFETCHW -+ -+ .p2align 4 -+ -+L(prewloop): # 64-byte with prefetching to state M -+#ifdef USE_AS_STRNCPY -+ sub $8, %r11 -+ jbe L(tail) -+#endif -+ -+ mov (%rsi, %rdx), %rax -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ sub %r10, %r8 -+ jnz L(tail) -+ -+ mov %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %edx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %edx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ sub %r10, %r8 -+ jnz L(tail) -+ -+ mov %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %edx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %edx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ sub %r10, %r8 -+ jnz L(tail) -+ -+ mov %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %edx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %edx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ sub %r10, %r8 -+ jnz L(tail) -+ -+ mov %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %edx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %edx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ sub %r10, %r8 -+ jnz L(tail) -+ -+ mov %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %edx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %edx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ sub %r10, %r8 -+ jnz L(tail) -+ -+ mov %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %edx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %edx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ sub %r10, %r8 -+ jnz L(tail) -+ -+ mov %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %edx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %edx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ sub %r10, %r8 -+ jnz L(tail) -+ -+ mov %rax, (%rdi, %rdx) -+ -+ prefetchw 512 + 8 (%rdi, %rdx) -+ prefetcht0 512 + 8 (%rsi, %rdx) -+ -+ add $8, %edx -+ -+ cmp %r9, %rdx -+ jb L(prewloop) -+ jmp L(preafter) -+ -+L(prewafter): # up to 1/2 L2 -+ -+ .p2align 4 -+ -+L(preloop): # 64-byte with prefetching to state E -+#ifdef USE_AS_STRNCPY -+ sub $8, %r11 -+ jbe L(tail) -+#endif -+ -+ mov (%rsi, %rdx), %rax -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ sub %r10, %r8 -+ jnz L(tail) -+ -+ mov %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %edx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %edx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ sub %r10, %r8 -+ jnz L(tail) -+ -+ mov %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %edx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %edx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ sub %r10, %r8 -+ jnz L(tail) -+ -+ mov %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %edx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %edx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ sub %r10, %r8 -+ jnz L(tail) -+ -+ mov %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %edx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %edx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ sub %r10, %r8 -+ jnz L(tail) -+ -+ mov %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %edx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %edx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ sub %r10, %r8 -+ jnz L(tail) -+ -+ mov %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %edx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %edx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ sub %r10, %r8 -+ jnz L(tail) -+ -+ mov %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %edx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %edx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ sub %r10, %r8 -+ jnz L(tail) -+ -+ mov %rax, (%rdi, %rdx) -+ -+ prefetcht0 512 + 8 (%rdi, %rdx) -+ prefetcht0 512 + 8 (%rsi, %rdx) -+ -+ add $8, %edx -+ -+ cmp %r9, %rdx -+ jb L(preloop) -+ -+ .p2align 4 -+ -+L(preafter): # up to 1/2 of L2 -+ -+L(NTtry): -+ mfence -+ -+L(NT): # 64-byte NT -+ -+ .p2align 4 -+ -+L(NTloop): -+#ifdef USE_AS_STRNCPY -+ sub $8, %r11 -+ jbe L(tail) -+#endif -+ -+ mov (%rsi, %rdx), %rax -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ sub %r10, %r8 -+ jnz L(NTtail) -+ -+ movnti %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %rdx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %rdx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ sub %r10, %r8 -+ jnz L(NTtail) -+ -+ movnti %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %rdx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %rdx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ sub %r10, %r8 -+ jnz L(NTtail) -+ -+ movnti %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %rdx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %rdx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ sub %r10, %r8 -+ jnz L(NTtail) -+ -+ movnti %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %rdx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %rdx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ sub %r10, %r8 -+ jnz L(NTtail) -+ -+ movnti %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %rdx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %rdx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ sub %r10, %r8 -+ jnz L(NTtail) -+ -+ movnti %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %rdx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %rdx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ sub %r10, %r8 -+ jnz L(NTtail) -+ -+ movnti %rax, (%rdi, %rdx) -+ -+#ifdef USE_AS_STRNCPY -+ add $8, %rdx -+ -+ sub $8, %r11 -+ jbe L(tail) -+ -+ mov (%rsi, %rdx), %rax -+#else -+ mov 8 (%rsi, %rdx), %rax -+ add $8, %rdx -+#endif -+ -+ mov %rcx, %r8 -+ add %rax, %r8 -+ sbb %r10, %r10 -+ -+ xor %rax, %r8 -+ or %rcx, %r8 -+ sub %r10, %r8 -+ jnz L(NTtail) -+ -+ movnti %rax, (%rdi, %rdx) -+ -+ prefetchnta 768 + 8 (%rsi, %rdx) -+ -+ add $8, %rdx -+ jmp L(NTloop) -+ -+ .p2align 4 -+ -+L(NTtail): -+ mfence # serialize memory operations -+ -+ .p2align 4 -+ -+L(NTafter): -+ -+L(tailtry): -+ -+L(tail): # 1-byte tail -+#ifdef USE_AS_STRNCPY -+ add $8, %r11 -+ jz L(exit) -+#endif -+ -+ .p2align 4 -+ -+L(tailloop): -+ movzbl (%rsi, %rdx), %eax -+ test %al, %al -+ mov %al, (%rdi, %rdx) -+ jz L(exit) -+ -+ inc %rdx -+ -+#ifdef USE_AS_STRNCPY -+ dec %r11 -+ jz L(exit) -+#endif -+ jmp L(tailloop) -+ -+ .p2align 4 -+ -+L(tailafter): -+ -+L(exit): -+#ifdef USE_AS_STPCPY -+ lea (%rdi, %rdx), %rax -+#else -+ mov %rdi, %rax -+#endif -+ -+#ifdef USE_AS_STRNCPY -+ test %r11, %r11 -+ mov %r11, %rcx -+ jnz 2f -+ -+ rep -+ ret -+ -+ .p2align 4 -+ -+2: - #ifdef USE_AS_STPCPY -- movq %rdx, %rax /* Destination is return value. */ -+ mov %rax, %r8 - #else -- movq %rdi, %rax /* Source is return value. */ -+ mov %rdi, %r8 -+# endif -+ -+ xor %eax, %eax # bzero () would do too, but usually there are only a handfull of bytes left -+ shr $3, %rcx -+ lea (%rdi, %rdx), %rdi -+ jz 3f -+ -+ rep stosq -+ -+ and $7, %r11d -+ jz 1f -+ -+ .p2align 4,, 4 -+ -+3: -+ mov %al, (%rdi) -+ inc %rdi -+ -+ dec %r11d -+ jnz 3b -+ -+ .p2align 4,, 4 -+ -+1: -+ mov %r8, %rax - #endif -- retq --END (BP_SYM (STRCPY)) --#ifndef USE_AS_STPCPY -+ ret -+ -+END (strcpy) -+ -+#if !defined USE_AS_STPCPY && !defined USE_AS_STRNCPY - libc_hidden_builtin_def (strcpy) - #endif -diff -Npruw -x CVS -x vssver.scc -x powerpc -x sync_file_range.c libc/sysdeps/x86_64/strncpy.S libc/sysdeps/x86_64/strncpy.S ---- libc/sysdeps/x86_64/strncpy.S 1969-12-31 18:00:00.000000000 -0600 -+++ libc/sysdeps/x86_64/strncpy.S 2006-05-05 15:25:34.559341000 -0500 -@@ -0,0 +1,8 @@ -+#define USE_AS_STRNCPY -+#define strcpy __strncpy -+ -+#include -+ -+weak_alias (__strncpy, strncpy) -+libc_hidden_def (__strncpy) -+libc_hidden_builtin_def (strncpy)