Skip to content

Slightly optimize hash add_new for packed arrays#21539

Open
iluuu1994 wants to merge 1 commit intophp:masterfrom
iluuu1994:optimize-ht-add_new-backed
Open

Slightly optimize hash add_new for packed arrays#21539
iluuu1994 wants to merge 1 commit intophp:masterfrom
iluuu1994:optimize-ht-add_new-backed

Conversation

@iluuu1994
Copy link
Member

No need to check for UNDEF, assume the index is undefined. This eliminates the offset calculation, lookup and type check. Though of course dwarfed by the conversion to a proper hash table.

This eliminates ~7 assembly instructions.

Before

<zend_hash_index_add_new>:
               	pushq	%rbp
               	movq	%rsp, %rbp
               	pushq	%r13
               	movq	%rdx, %r13
               	pushq	%r12
               	movq	%rsi, %r12
               	pushq	%rbx
               	movq	%rdi, %rbx
               	subq	$0x8, %rsp
               	movl	0x8(%rdi), %eax
               	testb	$0x4, %al
               	je	0x65dc80 <zend_hash_index_add_new+0xd0>
               	movl	0x18(%rdi), %ecx
               	movq	%rcx, %rax
               	cmpq	%rcx, %rsi
               	jb	0x65dd40 <zend_hash_index_add_new+0x190>
               	movl	0x20(%rdi), %esi
               	movq	%rsi, %rdx
               	cmpq	%rsi, %r12
               	jae	0x65ddb8 <zend_hash_index_add_new+0x208>
               	movq	0x10(%rdi), %rdx
               	movq	%r12, %rsi
               	shlq	$0x4, %rsi
               	leaq	(%rdx,%rsi), %rax
               	cmpq	%r12, %rcx
               	jae	0x65dc57 <zend_hash_index_add_new+0xa7>
               	shlq	$0x4, %rcx
               	addq	%rcx, %rdx
               	cmpq	%rcx, %rsi
               	je	0x65dc57 <zend_hash_index_add_new+0xa7>
               	movq	%rax, %rcx
               	subq	%rdx, %rcx
               	andl	$0x10, %ecx
               	je	0x65dc40 <zend_hash_index_add_new+0x90>
               	movl	$0x0, 0x8(%rdx)
               	addq	$0x10, %rdx
               	cmpq	%rdx, %rax
               	je	0x65dc57 <zend_hash_index_add_new+0xa7>
               	nopw	%cs:(%rax,%rax)
               	nopw	%cs:(%rax,%rax)
               	movl	$0x0, 0x8(%rdx)
               	addq	$0x20, %rdx
               	movl	$0x0, -0x8(%rdx)
               	cmpq	%rdx, %rax
               	jne	0x65dc40 <zend_hash_index_add_new+0x90>
               	movq	(%r13), %rcx
               	movl	0x8(%r13), %edx
               	leal	0x1(%r12), %edi
               	addl	$0x1, 0x1c(%rbx)
               	movl	%edi, 0x18(%rbx)
               	movq	%rdi, 0x28(%rbx)
               	movq	%rcx, (%rax)
               	movl	%edx, 0x8(%rax)
               	addq	$0x8, %rsp
               	popq	%rbx
               	popq	%r12
               	popq	%r13
               	popq	%rbp
               	retq
               	movl	0x20(%rdi), %ecx
               	testb	$0x8, %al
               	je	0x65dd10 <zend_hash_index_add_new+0x160>
               	movl	%ecx, %eax
               	cmpq	%rax, %rsi
               	jb	0x65dd70 <zend_hash_index_add_new+0x1c0>
               	callq	0x65ad80 <zend_hash_real_init_mixed>
               	movl	0x18(%rbx), %edx
               	movl	0xc(%rbx), %ecx
               	movq	0x10(%rbx), %rsi
               	leal	0x1(%rdx), %eax
               	movl	%eax, 0x18(%rbx)
               	movl	%edx, %eax
               	orl	%r12d, %ecx
               	shlq	$0x5, %rax
               	movslq	%ecx, %rcx
               	addq	%rsi, %rax
               	leaq	(%rsi,%rcx,4), %rcx
               	movl	(%rcx), %esi
               	movl	%esi, 0xc(%rax)
               	movl	%edx, (%rcx)
               	cmpq	%r12, 0x28(%rbx)
               	jg	0x65dce5 <zend_hash_index_add_new+0x135>
               	movabsq	$0x7fffffffffffffff, %rdx # imm = 0x7FFFFFFFFFFFFFFF
               	cmpq	%rdx, %r12
               	setne	%dl
               	movzbl	%dl, %edx
               	addq	%r12, %rdx
               	movq	%rdx, 0x28(%rbx)
               	movq	(%r13), %rcx
               	movl	0x8(%r13), %edx
               	addl	$0x1, 0x1c(%rbx)
               	movq	%r12, 0x10(%rax)
               	movq	$0x0, 0x18(%rax)
               	movq	%rcx, (%rax)
               	movl	%edx, 0x8(%rax)
               	addq	$0x8, %rsp
               	popq	%rbx
               	popq	%r12
               	popq	%r13
               	popq	%rbp
               	retq
               	nop
               	movl	0x18(%rdi), %edx
               	cmpl	%ecx, %edx
               	jb	0x65dc9e <zend_hash_index_add_new+0xee>
               	movl	0x1c(%rdi), %ecx
               	movl	%ecx, %eax
               	shrl	$0x5, %eax
               	addl	%ecx, %eax
               	cmpl	%edx, %eax
               	jb	0x65dda8 <zend_hash_index_add_new+0x1f8>
               	callq	0x65bf80 <zend_hash_do_resize.part.0>
               	movl	0x18(%rbx), %edx
               	jmp	0x65dc9e <zend_hash_index_add_new+0xee>
               	nopw	%cs:(%rax,%rax)
               	movq	%rsi, %rdx
               	xorl	%eax, %eax
               	shlq	$0x4, %rdx
               	addq	0x10(%rdi), %rdx
               	cmpb	$0x0, 0x8(%rdx)
               	jne	0x65dc75 <zend_hash_index_add_new+0xc5>
               	movq	%rbx, %rdi
               	callq	0x65bc90 <zend_hash_packed_to_hash>
               	movl	0x18(%rbx), %edx
               	jmp	0x65dc9e <zend_hash_index_add_new+0xee>
               	nopw	(%rax,%rax)
               	testb	$-0x80, 0x4(%rdi)
               	jne	0x65ddd7 <zend_hash_index_add_new+0x227>
               	cmpl	$0x8, %ecx
               	jne	0x65dde6 <zend_hash_index_add_new+0x236>
               	callq	0x5be2c0 <_emalloc_160>
               	movl	0xc(%rbx), %edx
               	movl	0x18(%rbx), %ecx
               	movb	$0x14, 0x8(%rbx)
               	negl	%edx
               	leaq	(%rax,%rdx,4), %rdx
               	movq	%rdx, 0x10(%rbx)
               	movq	$-0x1, -0x8(%rdx)
               	jmp	0x65dbf3 <zend_hash_index_add_new+0x43>
               	nopl	(%rax)
               	callq	0x65b840 <zend_hash_rehash>
               	movl	0x18(%rbx), %edx
               	jmp	0x65dc9e <zend_hash_index_add_new+0xee>
               	nopl	(%rax)
               	movq	%r12, %rcx
               	shrq	%rcx
               	cmpq	%rsi, %rcx
               	jae	0x65ddcc <zend_hash_index_add_new+0x21c>
               	movl	%esi, %ecx
               	shrl	%ecx
               	cmpl	0x1c(%rdi), %ecx
               	jb	0x65ddf5 <zend_hash_index_add_new+0x245>
               	cmpl	%edx, %eax
               	jb	0x65dd57 <zend_hash_index_add_new+0x1a7>
               	addl	%edx, %edx
               	movl	%edx, 0x20(%rbx)
               	jmp	0x65dd57 <zend_hash_index_add_new+0x1a7>
               	shlq	$0x4, %rax
               	leaq	0x8(%rax), %rdi
               	callq	0x5ba390 <__zend_malloc>
               	jmp	0x65dd80 <zend_hash_index_add_new+0x1d0>
               	shlq	$0x4, %rax
               	leaq	0x8(%rax), %rdi
               	callq	0x5bf7a0 <_emalloc>
               	jmp	0x65dd80 <zend_hash_index_add_new+0x1d0>
               	callq	0x65ab30 <zend_hash_packed_grow>
               	movq	0x10(%rbx), %rdx
               	movl	0x18(%rbx), %ecx
               	jmp	0x65dbf3 <zend_hash_index_add_new+0x43>
               	nopw	%cs:(%rax,%rax)

After

<zend_hash_index_add_new>:
               	pushq	%rbp
               	movq	%rsp, %rbp
               	pushq	%r13
               	movq	%rdx, %r13
               	pushq	%r12
               	movq	%rsi, %r12
               	pushq	%rbx
               	movq	%rdi, %rbx
               	subq	$0x8, %rsp
               	movl	0x8(%rdi), %eax
               	testb	$0x4, %al
               	je	0x65dc60 <zend_hash_index_add_new+0xb0>
               	movl	0x18(%rdi), %ecx
               	movq	%rcx, %rax
               	cmpq	%rcx, %rsi
               	jae	0x65dcb0 <zend_hash_index_add_new+0x100>
               	movq	%rbx, %rdi
               	callq	0x65bc90 <zend_hash_packed_to_hash>
               	movl	0x18(%rbx), %edx
               	movl	0xc(%rbx), %ecx
               	movq	0x10(%rbx), %rsi
               	leal	0x1(%rdx), %eax
               	movl	%eax, 0x18(%rbx)
               	movl	%edx, %eax
               	orl	%r12d, %ecx
               	shlq	$0x5, %rax
               	movslq	%ecx, %rcx
               	addq	%rsi, %rax
               	leaq	(%rsi,%rcx,4), %rcx
               	movl	(%rcx), %esi
               	movl	%esi, 0xc(%rax)
               	movl	%edx, (%rcx)
               	cmpq	%r12, 0x28(%rbx)
               	jg	0x65dc32 <zend_hash_index_add_new+0x82>
               	movabsq	$0x7fffffffffffffff, %rdx # imm = 0x7FFFFFFFFFFFFFFF
               	cmpq	%rdx, %r12
               	setne	%dl
               	movzbl	%dl, %edx
               	addq	%r12, %rdx
               	movq	%rdx, 0x28(%rbx)
               	movq	(%r13), %rcx
               	movl	0x8(%r13), %edx
               	addl	$0x1, 0x1c(%rbx)
               	movq	%r12, 0x10(%rax)
               	movq	$0x0, 0x18(%rax)
               	movq	%rcx, (%rax)
               	movl	%edx, 0x8(%rax)
               	addq	$0x8, %rsp
               	popq	%rbx
               	popq	%r12
               	popq	%r13
               	popq	%rbp
               	retq
               	nopl	(%rax,%rax)
               	movl	0x20(%rdi), %ecx
               	testb	$0x8, %al
               	je	0x65dc80 <zend_hash_index_add_new+0xd0>
               	movl	%ecx, %eax
               	cmpq	%rax, %rsi
               	jb	0x65dd40 <zend_hash_index_add_new+0x190>
               	callq	0x65ad80 <zend_hash_real_init_mixed>
               	movl	0x18(%rbx), %edx
               	jmp	0x65dbeb <zend_hash_index_add_new+0x3b>
               	nop
               	movl	0x18(%rdi), %edx
               	cmpl	%ecx, %edx
               	jb	0x65dbeb <zend_hash_index_add_new+0x3b>
               	movl	0x1c(%rdi), %ecx
               	movl	%ecx, %eax
               	shrl	$0x5, %eax
               	addl	%ecx, %eax
               	cmpl	%edx, %eax
               	jb	0x65dd78 <zend_hash_index_add_new+0x1c8>
               	callq	0x65bf80 <zend_hash_do_resize.part.0>
               	movl	0x18(%rbx), %edx
               	jmp	0x65dbeb <zend_hash_index_add_new+0x3b>
               	nopw	(%rax,%rax)
               	movl	0x20(%rdi), %esi
               	movq	%rsi, %rdx
               	cmpq	%rsi, %r12
               	jae	0x65dd88 <zend_hash_index_add_new+0x1d8>
               	movq	0x10(%rdi), %rdx
               	movq	%r12, %rsi
               	shlq	$0x4, %rsi
               	leaq	(%rdx,%rsi), %rax
               	cmpq	%r12, %rcx
               	jae	0x65dd17 <zend_hash_index_add_new+0x167>
               	shlq	$0x4, %rcx
               	addq	%rcx, %rdx
               	cmpq	%rcx, %rsi
               	je	0x65dd17 <zend_hash_index_add_new+0x167>
               	movq	%rax, %rcx
               	subq	%rdx, %rcx
               	andl	$0x10, %ecx
               	je	0x65dd00 <zend_hash_index_add_new+0x150>
               	movl	$0x0, 0x8(%rdx)
               	addq	$0x10, %rdx
               	cmpq	%rdx, %rax
               	je	0x65dd17 <zend_hash_index_add_new+0x167>
               	nopw	(%rax,%rax)
               	movl	$0x0, 0x8(%rdx)
               	addq	$0x20, %rdx
               	movl	$0x0, -0x8(%rdx)
               	cmpq	%rdx, %rax
               	jne	0x65dd00 <zend_hash_index_add_new+0x150>
               	movq	(%r13), %rcx
               	movl	0x8(%r13), %edx
               	leal	0x1(%r12), %edi
               	addl	$0x1, 0x1c(%rbx)
               	movl	%edi, 0x18(%rbx)
               	movq	%rdi, 0x28(%rbx)
               	movq	%rcx, (%rax)
               	movl	%edx, 0x8(%rax)
               	addq	$0x8, %rsp
               	popq	%rbx
               	popq	%r12
               	popq	%r13
               	popq	%rbp
               	retq
               	testb	$-0x80, 0x4(%rdi)
               	jne	0x65ddae <zend_hash_index_add_new+0x1fe>
               	cmpl	$0x8, %ecx
               	jne	0x65ddbd <zend_hash_index_add_new+0x20d>
               	callq	0x5be2c0 <_emalloc_160>
               	movl	0xc(%rbx), %edx
               	movl	0x18(%rbx), %ecx
               	movb	$0x14, 0x8(%rbx)
               	negl	%edx
               	leaq	(%rax,%rdx,4), %rdx
               	movq	%rdx, 0x10(%rbx)
               	movq	$-0x1, -0x8(%rdx)
               	jmp	0x65dcc3 <zend_hash_index_add_new+0x113>
               	nopl	(%rax)
               	callq	0x65b840 <zend_hash_rehash>
               	movl	0x18(%rbx), %edx
               	jmp	0x65dbeb <zend_hash_index_add_new+0x3b>
               	nopl	(%rax)
               	movq	%r12, %rcx
               	shrq	%rcx
               	cmpq	%rsi, %rcx
               	jae	0x65dd9c <zend_hash_index_add_new+0x1ec>
               	movl	%esi, %ecx
               	shrl	%ecx
               	cmpl	0x1c(%rdi), %ecx
               	jb	0x65ddcc <zend_hash_index_add_new+0x21c>
               	cmpl	%edx, %eax
               	jb	0x65dbe0 <zend_hash_index_add_new+0x30>
               	addl	%edx, %edx
               	movl	%edx, 0x20(%rbx)
               	jmp	0x65dbe0 <zend_hash_index_add_new+0x30>
               	shlq	$0x4, %rax
               	leaq	0x8(%rax), %rdi
               	callq	0x5ba390 <__zend_malloc>
               	jmp	0x65dd50 <zend_hash_index_add_new+0x1a0>
               	shlq	$0x4, %rax
               	leaq	0x8(%rax), %rdi
               	callq	0x5bf7a0 <_emalloc>
               	jmp	0x65dd50 <zend_hash_index_add_new+0x1a0>
               	callq	0x65ab30 <zend_hash_packed_grow>
               	movq	0x10(%rbx), %rdx
               	movl	0x18(%rbx), %ecx
               	jmp	0x65dcc3 <zend_hash_index_add_new+0x113>
               	nopl	(%rax)

No need to check for UNDEF, assume the index is undefined. This eliminates the
offset calculation, lookup and type check. Though of course dwarfed by the
conversion to a proper hash table.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant