Copyright (C) 1995-2013 FinalWire Ltd. All rights reserved. aida_bench32.dll build: 3.0.530.0 Mar 8 2013 11:09:18 M5A99X Arch:X32 CPUCount:8 NUMA:0 Freq: 4013.70MHz Priority:080 OS:6.1.7601 Service Pack 1 Memory: 2097024KB AllocGran:0x00010000 P:0x00001000 LP:0x00200000 Memory To Test:32768KB Small DTLB:512 Large DTLB:512 Huge DTLB:512 CLFlush:64 ProcMask: 0x000000ff Features: X86,TSC,X87,CMOV,MMX,SSE,SSE2,SSE3,MMXP,SSSE3,SSE4A,ABM,SSE4.1,SSE4.2,POPCNT,XOP,LAHF,CMPX8,AESNI,CLMUL,AVX,FMA3,FMA4,F16C,BMI,TBM,CLFLUSH,TSCINV,TOPEX,RDTSCP,3DNOWPREF,MISALIGNSSE,LNOP,PSE,4LTOP,PAGE1GB,FP128, CPU#000 Vendor: AuthenticAMD Family: 6f Model: 02 Stepping: 0 CoreType:0x10600f20 CPU#000 Type: "AMD FX(tm)-8350 Eight-Core Processor " CPU#000 AffMask: 0x00000001 CPU#000 PhysMask:0x000000ff CPU#000 APIC_ID:0x00000010 Phys_ID:000 Node_ID:00 CU_ID:00 Core_ID:00 CPU#000 L1I cache: 64KB, 64 byte cache line, 2 way, SMask:00000003 CPU#000 L1D cache: 16KB, 64 byte cache line, 4 way, SMask:00000001 CPU#000 L2 cache: 2048KB, 64 byte cache line, 16 way, SMask:00000003, inclusive CPU#000 L3 cache: 8192KB, 64 byte cache line, 64 way, SMask:000000ff, BDZL3 CPU#000 L1D 4K TLB: 64 entries, full, SMask:00000001 CPU#000 L1D 2M TLB: 64 entries, full, SMask:00000001 CPU#000 L1D 1G TLB: 64 entries, full, SMask:00000001 CPU#000 L2D 4K TLB: 1024 entries, 8 way, SMask:00000003 CPU#000 L2D 2M TLB: 1024 entries, 8 way, SMask:00000003 CPU#000 L2D 1G TLB: 1024 entries, 8 way, SMask:00000003 CPU#000 L1I 4K TLB: 48 entries, full, SMask:00000001 CPU#000 L1I 2M TLB: 24 entries, full, SMask:00000001 CPU#000 L1I 1G TLB: 24 entries, full, SMask:00000001 CPU#000 L2I 4K TLB: 512 entries, 4 way, SMask:00000003 CPU#000 L2I 2M TLB: 1024 entries, 8 way, SMask:00000003 CPU#000 L2I 1G TLB: 1024 entries, 8 way, SMask:00000003 CPU#001 Vendor: AuthenticAMD Family: 6f Model: 02 Stepping: 0 CoreType:0x10600f20 CPU#001 Type: "AMD FX(tm)-8350 Eight-Core Processor " CPU#001 AffMask: 0x00000002 CPU#001 PhysMask:0x000000ff CPU#001 APIC_ID:0x00000011 Phys_ID:000 Node_ID:00 CU_ID:00 Core_ID:01 CPU#001 L1I cache: 64KB, 64 byte cache line, 2 way, SMask:00000003 CPU#001 L1D cache: 16KB, 64 byte cache line, 4 way, SMask:00000002 CPU#001 L2 cache: 2048KB, 64 byte cache line, 16 way, SMask:00000003, inclusive CPU#001 L3 cache: 8192KB, 64 byte cache line, 64 way, SMask:000000ff, BDZL3 CPU#001 L1D 4K TLB: 64 entries, full, SMask:00000002 CPU#001 L1D 2M TLB: 64 entries, full, SMask:00000002 CPU#001 L1D 1G TLB: 64 entries, full, SMask:00000002 CPU#001 L2D 4K TLB: 1024 entries, 8 way, SMask:00000003 CPU#001 L2D 2M TLB: 1024 entries, 8 way, SMask:00000003 CPU#001 L2D 1G TLB: 1024 entries, 8 way, SMask:00000003 CPU#001 L1I 4K TLB: 48 entries, full, SMask:00000002 CPU#001 L1I 2M TLB: 24 entries, full, SMask:00000002 CPU#001 L1I 1G TLB: 24 entries, full, SMask:00000002 CPU#001 L2I 4K TLB: 512 entries, 4 way, SMask:00000003 CPU#001 L2I 2M TLB: 1024 entries, 8 way, SMask:00000003 CPU#001 L2I 1G TLB: 1024 entries, 8 way, SMask:00000003 CPU#002 Vendor: AuthenticAMD Family: 6f Model: 02 Stepping: 0 CoreType:0x10600f20 CPU#002 Type: "AMD FX(tm)-8350 Eight-Core Processor " CPU#002 AffMask: 0x00000004 CPU#002 PhysMask:0x000000ff CPU#002 APIC_ID:0x00000012 Phys_ID:000 Node_ID:00 CU_ID:01 Core_ID:02 CPU#002 L1I cache: 64KB, 64 byte cache line, 2 way, SMask:0000000c CPU#002 L1D cache: 16KB, 64 byte cache line, 4 way, SMask:00000004 CPU#002 L2 cache: 2048KB, 64 byte cache line, 16 way, SMask:0000000c, inclusive CPU#002 L3 cache: 8192KB, 64 byte cache line, 64 way, SMask:000000ff, BDZL3 CPU#002 L1D 4K TLB: 64 entries, full, SMask:00000004 CPU#002 L1D 2M TLB: 64 entries, full, SMask:00000004 CPU#002 L1D 1G TLB: 64 entries, full, SMask:00000004 CPU#002 L2D 4K TLB: 1024 entries, 8 way, SMask:0000000c CPU#002 L2D 2M TLB: 1024 entries, 8 way, SMask:0000000c CPU#002 L2D 1G TLB: 1024 entries, 8 way, SMask:0000000c CPU#002 L1I 4K TLB: 48 entries, full, SMask:00000004 CPU#002 L1I 2M TLB: 24 entries, full, SMask:00000004 CPU#002 L1I 1G TLB: 24 entries, full, SMask:00000004 CPU#002 L2I 4K TLB: 512 entries, 4 way, SMask:0000000c CPU#002 L2I 2M TLB: 1024 entries, 8 way, SMask:0000000c CPU#002 L2I 1G TLB: 1024 entries, 8 way, SMask:0000000c CPU#003 Vendor: AuthenticAMD Family: 6f Model: 02 Stepping: 0 CoreType:0x10600f20 CPU#003 Type: "AMD FX(tm)-8350 Eight-Core Processor " CPU#003 AffMask: 0x00000008 CPU#003 PhysMask:0x000000ff CPU#003 APIC_ID:0x00000013 Phys_ID:000 Node_ID:00 CU_ID:01 Core_ID:03 CPU#003 L1I cache: 64KB, 64 byte cache line, 2 way, SMask:0000000c CPU#003 L1D cache: 16KB, 64 byte cache line, 4 way, SMask:00000008 CPU#003 L2 cache: 2048KB, 64 byte cache line, 16 way, SMask:0000000c, inclusive CPU#003 L3 cache: 8192KB, 64 byte cache line, 64 way, SMask:000000ff, BDZL3 CPU#003 L1D 4K TLB: 64 entries, full, SMask:00000008 CPU#003 L1D 2M TLB: 64 entries, full, SMask:00000008 CPU#003 L1D 1G TLB: 64 entries, full, SMask:00000008 CPU#003 L2D 4K TLB: 1024 entries, 8 way, SMask:0000000c CPU#003 L2D 2M TLB: 1024 entries, 8 way, SMask:0000000c CPU#003 L2D 1G TLB: 1024 entries, 8 way, SMask:0000000c CPU#003 L1I 4K TLB: 48 entries, full, SMask:00000008 CPU#003 L1I 2M TLB: 24 entries, full, SMask:00000008 CPU#003 L1I 1G TLB: 24 entries, full, SMask:00000008 CPU#003 L2I 4K TLB: 512 entries, 4 way, SMask:0000000c CPU#003 L2I 2M TLB: 1024 entries, 8 way, SMask:0000000c CPU#003 L2I 1G TLB: 1024 entries, 8 way, SMask:0000000c CPU#004 Vendor: AuthenticAMD Family: 6f Model: 02 Stepping: 0 CoreType:0x10600f20 CPU#004 Type: "AMD FX(tm)-8350 Eight-Core Processor " CPU#004 AffMask: 0x00000010 CPU#004 PhysMask:0x000000ff CPU#004 APIC_ID:0x00000014 Phys_ID:000 Node_ID:00 CU_ID:02 Core_ID:04 CPU#004 L1I cache: 64KB, 64 byte cache line, 2 way, SMask:00000030 CPU#004 L1D cache: 16KB, 64 byte cache line, 4 way, SMask:00000010 CPU#004 L2 cache: 2048KB, 64 byte cache line, 16 way, SMask:00000030, inclusive CPU#004 L3 cache: 8192KB, 64 byte cache line, 64 way, SMask:000000ff, BDZL3 CPU#004 L1D 4K TLB: 64 entries, full, SMask:00000010 CPU#004 L1D 2M TLB: 64 entries, full, SMask:00000010 CPU#004 L1D 1G TLB: 64 entries, full, SMask:00000010 CPU#004 L2D 4K TLB: 1024 entries, 8 way, SMask:00000030 CPU#004 L2D 2M TLB: 1024 entries, 8 way, SMask:00000030 CPU#004 L2D 1G TLB: 1024 entries, 8 way, SMask:00000030 CPU#004 L1I 4K TLB: 48 entries, full, SMask:00000010 CPU#004 L1I 2M TLB: 24 entries, full, SMask:00000010 CPU#004 L1I 1G TLB: 24 entries, full, SMask:00000010 CPU#004 L2I 4K TLB: 512 entries, 4 way, SMask:00000030 CPU#004 L2I 2M TLB: 1024 entries, 8 way, SMask:00000030 CPU#004 L2I 1G TLB: 1024 entries, 8 way, SMask:00000030 CPU#005 Vendor: AuthenticAMD Family: 6f Model: 02 Stepping: 0 CoreType:0x10600f20 CPU#005 Type: "AMD FX(tm)-8350 Eight-Core Processor " CPU#005 AffMask: 0x00000020 CPU#005 PhysMask:0x000000ff CPU#005 APIC_ID:0x00000015 Phys_ID:000 Node_ID:00 CU_ID:02 Core_ID:05 CPU#005 L1I cache: 64KB, 64 byte cache line, 2 way, SMask:00000030 CPU#005 L1D cache: 16KB, 64 byte cache line, 4 way, SMask:00000020 CPU#005 L2 cache: 2048KB, 64 byte cache line, 16 way, SMask:00000030, inclusive CPU#005 L3 cache: 8192KB, 64 byte cache line, 64 way, SMask:000000ff, BDZL3 CPU#005 L1D 4K TLB: 64 entries, full, SMask:00000020 CPU#005 L1D 2M TLB: 64 entries, full, SMask:00000020 CPU#005 L1D 1G TLB: 64 entries, full, SMask:00000020 CPU#005 L2D 4K TLB: 1024 entries, 8 way, SMask:00000030 CPU#005 L2D 2M TLB: 1024 entries, 8 way, SMask:00000030 CPU#005 L2D 1G TLB: 1024 entries, 8 way, SMask:00000030 CPU#005 L1I 4K TLB: 48 entries, full, SMask:00000020 CPU#005 L1I 2M TLB: 24 entries, full, SMask:00000020 CPU#005 L1I 1G TLB: 24 entries, full, SMask:00000020 CPU#005 L2I 4K TLB: 512 entries, 4 way, SMask:00000030 CPU#005 L2I 2M TLB: 1024 entries, 8 way, SMask:00000030 CPU#005 L2I 1G TLB: 1024 entries, 8 way, SMask:00000030 CPU#006 Vendor: AuthenticAMD Family: 6f Model: 02 Stepping: 0 CoreType:0x10600f20 CPU#006 Type: "AMD FX(tm)-8350 Eight-Core Processor " CPU#006 AffMask: 0x00000040 CPU#006 PhysMask:0x000000ff CPU#006 APIC_ID:0x00000016 Phys_ID:000 Node_ID:00 CU_ID:03 Core_ID:06 CPU#006 L1I cache: 64KB, 64 byte cache line, 2 way, SMask:000000c0 CPU#006 L1D cache: 16KB, 64 byte cache line, 4 way, SMask:00000040 CPU#006 L2 cache: 2048KB, 64 byte cache line, 16 way, SMask:000000c0, inclusive CPU#006 L3 cache: 8192KB, 64 byte cache line, 64 way, SMask:000000ff, BDZL3 CPU#006 L1D 4K TLB: 64 entries, full, SMask:00000040 CPU#006 L1D 2M TLB: 64 entries, full, SMask:00000040 CPU#006 L1D 1G TLB: 64 entries, full, SMask:00000040 CPU#006 L2D 4K TLB: 1024 entries, 8 way, SMask:000000c0 CPU#006 L2D 2M TLB: 1024 entries, 8 way, SMask:000000c0 CPU#006 L2D 1G TLB: 1024 entries, 8 way, SMask:000000c0 CPU#006 L1I 4K TLB: 48 entries, full, SMask:00000040 CPU#006 L1I 2M TLB: 24 entries, full, SMask:00000040 CPU#006 L1I 1G TLB: 24 entries, full, SMask:00000040 CPU#006 L2I 4K TLB: 512 entries, 4 way, SMask:000000c0 CPU#006 L2I 2M TLB: 1024 entries, 8 way, SMask:000000c0 CPU#006 L2I 1G TLB: 1024 entries, 8 way, SMask:000000c0 CPU#007 Vendor: AuthenticAMD Family: 6f Model: 02 Stepping: 0 CoreType:0x10600f20 CPU#007 Type: "AMD FX(tm)-8350 Eight-Core Processor " CPU#007 AffMask: 0x00000080 CPU#007 PhysMask:0x000000ff CPU#007 APIC_ID:0x00000017 Phys_ID:000 Node_ID:00 CU_ID:03 Core_ID:07 CPU#007 L1I cache: 64KB, 64 byte cache line, 2 way, SMask:000000c0 CPU#007 L1D cache: 16KB, 64 byte cache line, 4 way, SMask:00000080 CPU#007 L2 cache: 2048KB, 64 byte cache line, 16 way, SMask:000000c0, inclusive CPU#007 L3 cache: 8192KB, 64 byte cache line, 64 way, SMask:000000ff, BDZL3 CPU#007 L1D 4K TLB: 64 entries, full, SMask:00000080 CPU#007 L1D 2M TLB: 64 entries, full, SMask:00000080 CPU#007 L1D 1G TLB: 64 entries, full, SMask:00000080 CPU#007 L2D 4K TLB: 1024 entries, 8 way, SMask:000000c0 CPU#007 L2D 2M TLB: 1024 entries, 8 way, SMask:000000c0 CPU#007 L2D 1G TLB: 1024 entries, 8 way, SMask:000000c0 CPU#007 L1I 4K TLB: 48 entries, full, SMask:00000080 CPU#007 L1I 2M TLB: 24 entries, full, SMask:00000080 CPU#007 L1I 1G TLB: 24 entries, full, SMask:00000080 CPU#007 L2I 4K TLB: 512 entries, 4 way, SMask:000000c0 CPU#007 L2I 2M TLB: 1024 entries, 8 way, SMask:000000c0 CPU#007 L2I 1G TLB: 1024 entries, 8 way, SMask:000000c0 Parameters: "-p=high -b=IL -- Vishera_InstLat_x32.txt " Instruction Latency: Used CPUs: 1 ProcMask: 0x00000080 0 X86 :NOP L: [no true dep.] T: 0.06ns= 0.25c 1 X86 :0x66 NOP L: [no true dep.] T: 0.06ns= 0.25c 2 X86 : 2x 0x66 NOP L: [no true dep.] T: 0.06ns= 0.25c 3 X86 : 3x 0x66 NOP L: [no true dep.] T: 0.06ns= 0.25c 4 X86 : 4x 0x66 NOP L: [no true dep.] T: 3.55ns= 14.25c 5 X86 : 5x 0x66 NOP L: [no true dep.] T: 3.61ns= 14.50c 6 X86 : 6x 0x66 NOP L: [no true dep.] T: 3.67ns= 14.75c 7 X86 : 7x 0x66 NOP L: [no true dep.] T: 3.74ns= 15.00c 8 X86 : 8x 0x66 NOP L: [no true dep.] T: 5.29ns= 21.25c 9 X86 : 9x 0x66 NOP L: [no true dep.] T: 5.36ns= 21.50c 10 X86 :10x 0x66 NOP L: [no true dep.] T: 5.42ns= 21.75c 11 X86 :11x 0x66 NOP L: [no true dep.] T: 5.48ns= 22.00c 12 X86 :12x 0x66 NOP L: [no true dep.] T: 7.04ns= 28.25c 13 X86 :13x 0x66 NOP L: [no true dep.] T: 7.10ns= 28.50c 14 X86 :14x 0x66 NOP L: [no true dep.] T: 7.16ns= 28.75c 15 SSE2 :PAUSE L: [no true dep.] T: 1.25ns= 5.00c 16 X86 :MOV r8, imm8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 17 X86 :MOV r16, imm16 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 18 X86 :MOV r32, imm32 L: 0.12ns= 0.5c T: 0.12ns= 0.49c 20 X86 :MOV r8, r8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 21 X86 :MOV r16, r16 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 22 X86 :MOV r32, r32 L: 0.25ns= 1.0c T: 0.07ns= 0.27c 24 X86 :MOV r8, [m8] L: 1.25ns= 5.0c T: 0.13ns= 0.53c 25 X86 :MOV r16, [m16] L: 1.25ns= 5.0c T: 0.13ns= 0.53c 26 X86 :MOV r32, [m32] L: 1.00ns= 4.0c T: 0.12ns= 0.50c 28 X86 :MOV [m8], r8 L: [memory dep.] T: 0.25ns= 1.00c 29 X86 :MOV [m16], r16 L: [memory dep.] T: 0.25ns= 1.00c 30 X86 :MOV [m32], r32 L: [memory dep.] T: 0.25ns= 1.00c 31 X86 :MOV [m32 + 8], r32 L: [memory dep.] T: 0.25ns= 1.00c 34 X86 :MOV r8,[m8]+MOV [m8],r8 L: 1.25ns= 5.0c T: 0.22ns= 0.87c 35 X86 :MOV r16,[m16]+MOV [m16],r16 L: 8.95ns= 35.9c T: 10.78ns= 43.25c 36 X86 :MOV r32,[m32]+MOV [m32],r32 L: 8.70ns= 34.9c T: 11.17ns= 44.83c 38 SSE2 :MOVNTI [m32], r32 L: [memory dep.] T: 2.00ns= 2.00c 40 CMOV :CMOVNZ r16, r16 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 41 CMOV :CMOVNZ r32, r32 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 43 X86 :MOVSX r16, r8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 44 X86 :MOVSX r32, r8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 46 X86 :MOVSX r32, r16 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 49 X86 :MOVZX r16, r8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 50 X86 :MOVZX r32, r8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 52 X86 :MOVZX r32, r16 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 54 X86 :XCHG r8, r8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 55 X86 :XCHG r16, r16 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 56 X86 :XCHG r32, r32 L: 0.25ns= 1.0c T: 0.16ns= 0.63c 58 X86 :XCHG r1_8, r2_8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 59 X86 :XCHG r1_16, r2_16 L: 0.25ns= 1.0c T: 0.25ns= 1.00c 60 X86 :XCHG r1_32, r2_32 L: 0.25ns= 1.0c T: 0.14ns= 0.55c 62 X86 :XCHG r8, [m8] L: 10.22ns= 41.0c T: 10.22ns= 41.00c 63 X86 :XCHG r16, [m16] L: 10.22ns= 41.0c T: 10.22ns= 41.00c 64 X86 :XCHG r32, [m32] L: 10.22ns= 41.0c T: 10.22ns= 41.00c 66 X86 :ADD r32, 0x04000 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 67 X86 :ADD r32, 0x08000 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 68 X86 :ADD r32, 0x10000 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 69 X86 :ADD r32, 0x20000 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 70 X86 :ADD r8, r8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 71 X86 :ADD r16, r16 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 72 X86 :ADD r32, r32 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 74 X86 :ADD r8, [m8] L: 1.25ns= 5.0c T: 0.13ns= 0.53c 75 X86 :ADD r16, [m16] L: 1.25ns= 5.0c T: 0.13ns= 0.53c 76 X86 :ADD r32, [m32] L: 1.25ns= 5.0c T: 0.13ns= 0.53c 78 X86 :ADD [m8], r8 L: 1.99ns= 8.0c T: 0.25ns= 1.00c 79 X86 :ADD [m16], r16 L: 1.99ns= 8.0c T: 0.25ns= 1.00c 80 X86 :ADD [m32], r32 L: 1.99ns= 8.0c T: 0.25ns= 1.00c 81 X86 :ADD [m32 + 8], r32 L: 1.99ns= 8.0c T: 0.25ns= 1.00c 84 X86 :LOCK ADD [m8], r8 L: 10.34ns= 41.5c T: 10.48ns= 42.08c 85 X86 :LOCK ADD [m16], r16 L: 10.34ns= 41.5c T: 10.48ns= 42.08c 86 X86 :LOCK ADD [m32], r32 L: 10.34ns= 41.5c T: 10.51ns= 42.17c 87 X86 :LOCK ADD [m32 + 8], r32 L: 10.34ns= 41.5c T: 10.51ns= 42.17c 90 X86 :ADD r8, imm8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 91 X86 :ADD r16, imm8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 92 X86 :ADD r32, imm8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 94 X86 :ADD r16, imm16 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 95 X86 :ADD r32, imm32 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 97 X86 :ADD [m8], imm8 L: 1.99ns= 8.0c T: 0.25ns= 1.00c 98 X86 :ADD [m16], imm8 L: 1.99ns= 8.0c T: 0.25ns= 1.00c 99 X86 :ADD [m32], imm8 L: 1.99ns= 8.0c T: 0.25ns= 1.00c 101 X86 :ADD [m16], imm16 L: 1.99ns= 8.0c T: 0.25ns= 1.00c 102 X86 :ADD [m32], imm32 L: 1.99ns= 8.0c T: 0.25ns= 1.00c 104 X86 :ADD al, imm8 L: 0.25ns= 1.0c T: 0.25ns= 1.00c 105 X86 :ADD ax, imm16 L: 0.25ns= 1.0c T: 0.25ns= 1.00c 106 X86 :ADD eax, imm32 L: 0.25ns= 1.0c T: 0.25ns= 1.00c 108 X86 :SUB r8, r8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 109 X86 :SUB r16, r16 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 110 X86 :SUB r32, r32 L: 0.12ns= 0.5c T: 0.12ns= 0.49c 112 X86 :SUB r1_8, r2_8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 113 X86 :SUB r1_16, r2_16 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 114 X86 :SUB r1_32, r2_32 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 116 X86 :ADC r8, r8 L: 0.25ns= 1.0c T: 0.25ns= 1.00c 117 X86 :ADC r16, r16 L: 0.25ns= 1.0c T: 0.25ns= 1.00c 118 X86 :ADC r32, r32 L: 0.25ns= 1.0c T: 0.25ns= 1.00c 120 X86 :SBB r8, r8 L: 0.25ns= 1.0c T: 0.25ns= 1.00c 121 X86 :SBB r16, r16 L: 0.25ns= 1.0c T: 0.25ns= 1.00c 122 X86 :SBB r32, r32 L: 0.23ns= 0.9c T: 0.23ns= 0.92c 124 X86 :SBB r1_8, r2_8 L: 0.25ns= 1.0c T: 0.25ns= 1.00c 125 X86 :SBB r1_16, r2_16 L: 0.25ns= 1.0c T: 0.25ns= 1.00c 126 X86 :SBB r1_32, r2_32 L: 0.25ns= 1.0c T: 0.25ns= 1.00c 128 X86 :CMP r8, r8 L: [no true dep.] T: 0.12ns= 0.50c 129 X86 :CMP r16, r16 L: [no true dep.] T: 0.13ns= 0.50c 130 X86 :CMP r32, r32 L: [no true dep.] T: 0.12ns= 0.50c 132 X86 :CMP r1_8, r2_8 L: [no true dep.] T: 0.12ns= 0.50c 133 X86 :CMP r1_16, r2_16 L: [no true dep.] T: 0.12ns= 0.50c 134 X86 :CMP r1_32, r2_32 L: [no true dep.] T: 0.12ns= 0.50c 136 X86 :AND r8, r8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 137 X86 :AND r16, r16 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 138 X86 :AND r32, r32 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 140 X86 :AND r1_8, r2_8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 141 X86 :AND r1_16, r2_16 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 142 X86 :AND r1_32, r2_32 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 144 X86 :OR r8, r8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 145 X86 :OR r16, r16 L: 0.25ns= 1.0c T: 0.13ns= 0.50c 146 X86 :OR r32, r32 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 148 X86 :OR r1_8, r2_8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 149 X86 :OR r1_16, r2_16 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 150 X86 :OR r1_32, r2_32 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 152 X86 :XOR r8, r8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 153 X86 :XOR r16, r16 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 154 X86 :XOR r32, r32 L: 0.12ns= 0.5c T: 0.12ns= 0.50c 156 X86 :XOR r1_8, r2_8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 157 X86 :XOR r1_16, r2_16 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 158 X86 :XOR r1_32, r2_32 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 160 X86 :NEG r8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 161 X86 :NEG r16 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 162 X86 :NEG r32 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 164 X86 :NOT r8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 165 X86 :NOT r16 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 166 X86 :NOT r32 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 168 X86 :TEST r8, r8 L: [no true dep.] T: 0.12ns= 0.50c 169 X86 :TEST r16, r16 L: [no true dep.] T: 0.13ns= 0.50c 170 X86 :TEST r32, r32 L: [no true dep.] T: 0.12ns= 0.50c 172 X86 :TEST r1_8, r2_8 L: [no true dep.] T: 0.12ns= 0.50c 173 X86 :TEST r1_16, r2_16 L: [no true dep.] T: 0.12ns= 0.50c 174 X86 :TEST r1_32, r2_32 L: [no true dep.] T: 0.12ns= 0.50c 176 X86 :BT r16, r16 L: [no true dep.] T: 0.12ns= 0.50c 177 X86 :BT r32, r32 L: [no true dep.] T: 0.12ns= 0.50c 179 X86 :BT r16, imm8 L: [no true dep.] T: 0.12ns= 0.50c 180 X86 :BT r32, imm8 L: [no true dep.] T: 0.12ns= 0.50c 182 X86 :BTC r16, r16 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 183 X86 :BTC r32, r32 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 185 X86 :BTC r16, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 186 X86 :BTC r32, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 188 X86 :BTR r16, r16 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 189 X86 :BTR r32, r32 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 191 X86 :BTR r16, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 192 X86 :BTR r32, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 194 X86 :BTS r16, r16 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 195 X86 :BTS r32, r32 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 197 X86 :BTS r16, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 198 X86 :BTS r32, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 200 X86 :SETC r8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 201 X86 :INC r8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 202 X86 :INC r16 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 203 X86 :INC r32 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 205 X86 :LEA r16, [r16+r16] L: 0.50ns= 2.0c T: 0.25ns= 1.00c 206 X86 :LEA r32, [r32+r32] L: 0.25ns= 1.0c T: 0.12ns= 0.50c 208 X86 :LEA r16, [r+r+disp8] L: 0.75ns= 3.0c T: 0.25ns= 1.00c 209 X86 :LEA r32, [r+r+disp8] L: 0.50ns= 2.0c T: 0.12ns= 0.50c 211 X86 :LEA r16, [r+r*8] L: 0.75ns= 3.0c T: 0.25ns= 1.00c 212 X86 :LEA r32, [r+r*8] L: 0.50ns= 2.0c T: 0.13ns= 0.50c 214 X86 :LEA r16, [r+r*8+disp8] L: 0.75ns= 3.0c T: 0.25ns= 1.00c 215 X86 :LEA r32, [r+r*8+disp8] L: 0.50ns= 2.0c T: 0.12ns= 0.50c 217 X86 :SHL r8, 1 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 218 X86 :SHL r16, 1 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 219 X86 :SHL r32, 1 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 221 X86 :SHL r8, imm8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 222 X86 :SHL r16, imm8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 223 X86 :SHL r32, imm8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 225 X86 :SHL r8, cl L: 0.25ns= 1.0c T: 0.13ns= 0.51c 226 X86 :SHL r16, cl L: 0.25ns= 1.0c T: 0.12ns= 0.50c 227 X86 :SHL r32, cl L: 0.25ns= 1.0c T: 0.12ns= 0.50c 229 X86 :SHR r8, 1 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 230 X86 :SHR r16, 1 L: 0.25ns= 1.0c T: 0.13ns= 0.50c 231 X86 :SHR r32, 1 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 233 X86 :SHR r8, imm8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 234 X86 :SHR r16, imm8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 235 X86 :SHR r32, imm8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 237 X86 :SHR r8, cl L: 0.25ns= 1.0c T: 0.13ns= 0.51c 238 X86 :SHR r16, cl L: 0.25ns= 1.0c T: 0.12ns= 0.50c 239 X86 :SHR r32, cl L: 0.25ns= 1.0c T: 0.12ns= 0.50c 241 X86 :SAR r8, 1 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 242 X86 :SAR r16, 1 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 243 X86 :SAR r32, 1 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 245 X86 :SAR r8, imm8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 246 X86 :SAR r16, imm8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 247 X86 :SAR r32, imm8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 249 X86 :SAR r8, cl L: 0.25ns= 1.0c T: 0.13ns= 0.51c 250 X86 :SAR r16, cl L: 0.25ns= 1.0c T: 0.12ns= 0.50c 251 X86 :SAR r32, cl L: 0.25ns= 1.0c T: 0.12ns= 0.50c 253 X86 :SHLD r16, r16, imm8 L: 1.00ns= 4.0c T: 0.75ns= 3.00c 254 X86 :SHLD r32, r32, imm8 L: 1.00ns= 4.0c T: 0.75ns= 3.00c 256 X86 :SHLD r16, r16, cl L: 1.00ns= 4.0c T: 0.75ns= 3.00c 257 X86 :SHLD r32, r32, cl L: 1.00ns= 4.0c T: 0.75ns= 3.00c 259 X86 :SHRD r16, r16, imm8 L: 1.00ns= 4.0c T: 0.75ns= 3.00c 260 X86 :SHRD r32, r32, imm8 L: 1.00ns= 4.0c T: 0.75ns= 3.00c 262 X86 :SHRD r16, r16, cl L: 1.00ns= 4.0c T: 0.75ns= 3.00c 263 X86 :SHRD r32, r32, cl L: 1.00ns= 4.0c T: 0.75ns= 3.00c 265 X86 :ROL r8, 1 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 266 X86 :ROL r16, 1 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 267 X86 :ROL r32, 1 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 269 X86 :ROL r8, imm8 L: 0.25ns= 1.0c T: 0.13ns= 0.50c 270 X86 :ROL r16, imm8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 271 X86 :ROL r32, imm8 L: 0.25ns= 1.0c T: 0.13ns= 0.50c 273 X86 :ROL r8, cl L: 0.25ns= 1.0c T: 0.13ns= 0.51c 274 X86 :ROL r16, cl L: 0.25ns= 1.0c T: 0.12ns= 0.50c 275 X86 :ROL r32, cl L: 0.25ns= 1.0c T: 0.12ns= 0.50c 277 X86 :ROR r8, 1 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 278 X86 :ROR r16, 1 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 279 X86 :ROR r32, 1 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 281 X86 :ROR r8, imm8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 282 X86 :ROR r16, imm8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 283 X86 :ROR r32, imm8 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 285 X86 :ROR r8, cl L: 0.25ns= 1.0c T: 0.13ns= 0.51c 286 X86 :ROR r16, cl L: 0.25ns= 1.0c T: 0.12ns= 0.50c 287 X86 :ROR r32, cl L: 0.25ns= 1.0c T: 0.12ns= 0.50c 289 X86 :RCL r8, 1 L: 0.25ns= 1.0c T: 0.25ns= 1.00c 290 X86 :RCL r16, 1 L: 0.25ns= 1.0c T: 0.25ns= 1.00c 291 X86 :RCL r32, 1 L: 0.25ns= 1.0c T: 0.25ns= 1.00c 293 X86 :RCL r8, imm8 L: 3.11ns= 12.5c T: 3.11ns= 12.50c 294 X86 :RCL r16, imm8 L: 2.62ns= 10.5c T: 2.62ns= 10.50c 295 X86 :RCL r32, imm8 L: 1.87ns= 7.5c T: 1.87ns= 7.50c 297 X86 :RCL r8, cl L: 2.99ns= 12.0c T: 2.99ns= 12.00c 298 X86 :RCL r16, cl L: 2.49ns= 10.0c T: 2.49ns= 10.00c 299 X86 :RCL r32, cl L: 1.76ns= 7.1c T: 1.74ns= 7.00c 301 X86 :RCR r8, 1 L: 0.25ns= 1.0c T: 0.25ns= 1.00c 302 X86 :RCR r16, 1 L: 0.25ns= 1.0c T: 0.25ns= 1.00c 303 X86 :RCR r32, 1 L: 0.25ns= 1.0c T: 0.25ns= 1.00c 305 X86 :RCR r8, imm8 L: 2.87ns= 11.5c T: 2.87ns= 11.50c 306 X86 :RCR r16, imm8 L: 2.37ns= 9.5c T: 2.37ns= 9.50c 307 X86 :RCR r32, imm8 L: 1.74ns= 7.0c T: 1.74ns= 7.00c 309 X86 :RCR r8, cl L: 2.74ns= 11.0c T: 2.74ns= 11.00c 310 X86 :RCR r16, cl L: 2.24ns= 9.0c T: 2.24ns= 9.00c 311 X86 :RCR r32, cl L: 1.64ns= 6.6c T: 1.62ns= 6.50c 313 X86 :BSF r16, r16 L: 0.75ns= 3.0c T: 0.75ns= 3.00c 314 X86 :BSF r32, r32 L: 0.75ns= 3.0c T: 0.75ns= 3.00c 316 X86 :BSR r16, r16 L: 1.00ns= 4.0c T: 1.00ns= 4.00c 317 X86 :BSR r32, r32 L: 1.00ns= 4.0c T: 1.00ns= 4.00c 319 X86 :BSWAP r32 L: 0.25ns= 1.0c T: 0.12ns= 0.50c 327 X86 :IMUL r16, r16 L: 1.00ns= 4.0c T: 0.52ns= 2.08c 328 X86 :IMUL r32, r32 L: 1.00ns= 4.0c T: 0.52ns= 2.08c 330 X86 :IMUL r16, r16, imm8 L: 1.25ns= 5.0c T: 0.48ns= 1.92c 331 X86 :IMUL r32, r32, imm8 L: 1.00ns= 4.0c T: 0.52ns= 2.08c 333 X86 :IMUL r16, r16, imm16 L: 1.25ns= 5.0c T: 0.48ns= 1.92c 334 X86 :IMUL r32, r32, imm32 L: 1.00ns= 4.0c T: 0.52ns= 2.08c 336 X86 :IMUL r8 (ah) L: 1.00ns= 4.0c T: 1.00ns= 4.00c 337 X86 :IMUL r16 (dx) L: 1.49ns= 6.0c T: 1.00ns= 4.00c 338 X86 :IMUL r32 (edx) L: 1.25ns= 5.0c T: 1.00ns= 4.00c 340 X86 :MUL r8 (ah) L: 1.00ns= 4.0c T: 1.00ns= 4.00c 341 X86 :MUL r16 (dx) L: 1.49ns= 6.0c T: 1.00ns= 4.00c 342 X86 :MUL r32 (edx) L: 1.25ns= 5.0c T: 1.00ns= 4.00c 344 X86 :IMUL r8 (al) L: 1.00ns= 4.0c T: 1.00ns= 4.00c 345 X86 :IMUL r16 (ax) L: 1.00ns= 4.0c T: 1.00ns= 4.00c 346 X86 :IMUL r32 (eax) L: 1.00ns= 4.0c T: 1.00ns= 4.00c 348 X86 :MUL r8 (al) L: 1.00ns= 4.0c T: 1.00ns= 4.00c 349 X86 :MUL r16 (ax) L: 1.00ns= 4.0c T: 1.00ns= 4.00c 350 X86 :MUL r32 (eax) L: 1.00ns= 4.0c T: 1.00ns= 4.00c 352 X86 :IDIV r8 14/ 7b (full) L: 5.48ns= 22.0c T: 5.23ns= 21.00c 353 X86 :IDIV r8 12/ 7b ax upd L: 4.73ns= 19.0c T: 4.48ns= 18.00c 354 X86 :IDIV r8 7/ 7b ax upd L: 4.73ns= 19.0c T: 4.48ns= 18.00c 355 X86 :IDIV r8 4/ 7b ax upd L: [no true dep.] T: 4.48ns= 18.00c 356 X86 :IDIV r8 0/ 7b L: [no true dep.] T: 4.24ns= 17.00c 357 X86 :IDIV r8 11/ 4b ax upd L: 4.73ns= 19.0c T: 4.48ns= 18.00c 358 X86 :IDIV r8 8/ 4b ax upd L: [no true dep.] T: 4.48ns= 18.00c 359 X86 :IDIV r8 4/ 4b ax upd L: 4.73ns= 19.0c T: 4.48ns= 18.00c 360 X86 :IDIV r8 0/ 4b L: [no true dep.] T: 4.24ns= 17.00c 361 X86 :IDIV r8 2^12/2^6 ax upd L: [no true dep.] T: 4.48ns= 18.00c 362 X86 :IDIV r8 1/1 L: 4.48ns= 18.0c T: 4.24ns= 17.00c 363 X86 :IDIV r8 1/1 ax upd L: 4.73ns= 19.0c T: 4.48ns= 18.00c 364 X86 :IDIV r16 30/15b (full) L: 6.85ns= 27.5c T: 6.73ns= 27.00c 365 X86 :IDIV r16 24/15b ax upd L: 5.48ns= 22.0c T: 5.40ns= 21.67c 366 X86 :IDIV r16 15/15b ax upd L: 3.99ns= 16.0c T: 3.90ns= 15.67c 367 X86 :IDIV r16 8/15b ax/dx upd L: [no true dep.] T: 3.99ns= 16.00c 368 X86 :IDIV r16 0/15b L: [no true dep.] T: 3.72ns= 14.92c 369 X86 :IDIV r16 23/ 8b ax upd L: 6.98ns= 28.0c T: 6.89ns= 27.67c 370 X86 :IDIV r16 16/ 8b ax upd L: [no true dep.] T: 5.19ns= 20.83c 371 X86 :IDIV r16 8/ 8b ax upd L: 3.99ns= 16.0c T: 3.90ns= 15.67c 372 X86 :IDIV r16 0/ 8b L: [no true dep.] T: 3.72ns= 14.92c 373 X86 :IDIV r16 2^28/2^14 ax/dx L: [no true dep.] T: 6.58ns= 26.42c 374 X86 :IDIV r16 1/1 L: 3.88ns= 15.6c T: 3.72ns= 14.92c 375 X86 :IDIV r16 1/1 ax upd L: 3.99ns= 16.0c T: 3.90ns= 15.67c 376 X86 :IDIV r16 1/1 ax/dx upd L: 4.24ns= 17.0c T: 3.99ns= 16.00c 377 X86 :IDIV r32 62/31b (full) L: 10.46ns= 42.0c T: 10.46ns= 42.00c 378 X86 :IDIV r32 62/31b 0 rem. L: 10.46ns= 42.0c T: 10.46ns= 42.00c 379 X86 :IDIV r32 48/31b eax upd L: 6.98ns= 28.0c T: 6.98ns= 28.00c 380 X86 :IDIV r32 31/31b eax upd L: 3.49ns= 14.0c T: 3.49ns= 14.00c 381 X86 :IDIV r32 16/31b eax/edx L: [no true dep.] T: 3.49ns= 14.00c 382 X86 :IDIV r32 0/31b L: [no true dep.] T: 3.49ns= 14.00c 383 X86 :IDIV r32 47/16b eax upd L: 10.46ns= 42.0c T: 10.46ns= 42.00c 384 X86 :IDIV r32 32/16b eax upd L: [no true dep.] T: 6.73ns= 27.00c 385 X86 :IDIV r32 16/16b eax upd L: 3.49ns= 14.0c T: 3.49ns= 14.00c 386 X86 :IDIV r32 0/16b L: [no true dep.] T: 3.49ns= 14.00c 387 X86 :IDIV r32 2^60/2^30 eax/edx L: [no true dep.] T: 10.22ns= 41.00c 388 X86 :IDIV r32 1/1 L: 3.49ns= 14.0c T: 3.49ns= 14.00c 389 X86 :IDIV r32 1/1 eax upd L: 3.49ns= 14.0c T: 3.49ns= 14.00c 390 X86 :IDIV r32 1/1 eax/edx upd L: 3.49ns= 14.0c T: 3.49ns= 14.00c 405 X86 :DIV r8 16/ 8b (full) L: 5.48ns= 22.0c T: 5.23ns= 21.00c 406 X86 :DIV r8 12/ 8b ax upd L: 4.73ns= 19.0c T: 4.48ns= 18.00c 407 X86 :DIV r8 8/ 8b ax upd L: 4.73ns= 19.0c T: 4.48ns= 18.00c 408 X86 :DIV r8 4/ 8b ax upd L: [no true dep.] T: 4.48ns= 18.00c 409 X86 :DIV r8 0/ 8b L: [no true dep.] T: 4.24ns= 17.00c 410 X86 :DIV r8 12/ 4b ax upd L: 4.73ns= 19.0c T: 4.48ns= 18.00c 411 X86 :DIV r8 8/ 4b ax upd L: [no true dep.] T: 4.48ns= 18.00c 412 X86 :DIV r8 4/ 4b ax upd L: 4.73ns= 19.0c T: 4.48ns= 18.00c 413 X86 :DIV r8 0/ 4b L: [no true dep.] T: 4.24ns= 17.00c 414 X86 :DIV r8 2^14/2^7 ax upd L: [no true dep.] T: 4.48ns= 18.00c 415 X86 :DIV r8 1/1 L: 4.48ns= 18.0c T: 4.24ns= 17.00c 416 X86 :DIV r8 1/1 ax upd L: 4.73ns= 19.0c T: 4.48ns= 18.00c 417 X86 :DIV r16 32/16b (full) L: 6.85ns= 27.5c T: 6.73ns= 27.00c 418 X86 :DIV r16 30/15b 0 rem. L: 6.85ns= 27.5c T: 6.73ns= 27.00c 419 X86 :DIV r16 24/16b ax upd L: 5.27ns= 21.2c T: 5.19ns= 20.83c 420 X86 :DIV r16 16/16b ax upd L: 3.99ns= 16.0c T: 3.90ns= 15.67c 421 X86 :DIV r16 8/16b ax/dx upd L: [no true dep.] T: 3.99ns= 16.00c 422 X86 :DIV r16 0/16b L: [no true dep.] T: 3.72ns= 14.92c 423 X86 :DIV r16 24/ 8b ax upd L: 6.98ns= 28.0c T: 6.89ns= 27.67c 424 X86 :DIV r16 16/ 8b ax upd L: [no true dep.] T: 5.19ns= 20.83c 425 X86 :DIV r16 8/ 8b ax upd L: 3.99ns= 16.0c T: 3.90ns= 15.67c 426 X86 :DIV r16 0/ 8b L: [no true dep.] T: 3.72ns= 14.92c 427 X86 :DIV r16 1/1 L: 3.88ns= 15.6c T: 3.72ns= 14.92c 428 X86 :DIV r16 1/1 ax upd L: 3.99ns= 16.0c T: 3.90ns= 15.67c 429 X86 :DIV r16 1/1 ax/dx upd L: 4.24ns= 17.0c T: 3.99ns= 16.00c 430 X86 :DIV r32 64/32b (full) L: 10.46ns= 42.0c T: 10.46ns= 42.00c 431 X86 :DIV r32 62/31b 0 rem. L: 10.46ns= 42.0c T: 10.46ns= 42.00c 432 X86 :DIV r32 48/32b eax upd L: 6.73ns= 27.0c T: 6.73ns= 27.00c 433 X86 :DIV r32 32/32b eax upd L: 3.49ns= 14.0c T: 3.49ns= 14.00c 434 X86 :DIV r32 16/32b eax/edx L: [no true dep.] T: 3.49ns= 14.00c 435 X86 :DIV r32 0/32b L: [no true dep.] T: 3.49ns= 14.00c 436 X86 :DIV r32 48/16b eax upd L: 10.46ns= 42.0c T: 10.46ns= 42.00c 437 X86 :DIV r32 32/16b eax upd L: [no true dep.] T: 6.73ns= 27.00c 438 X86 :DIV r32 16/16b eax upd L: 3.49ns= 14.0c T: 3.49ns= 14.00c 439 X86 :DIV r32 0/16b L: [no true dep.] T: 3.49ns= 14.00c 440 X86 :DIV r32 2^62/2^31 eax/edx L: [no true dep.] T: 10.46ns= 42.00c 441 X86 :DIV r32 1/1 L: 3.49ns= 14.0c T: 3.49ns= 14.00c 442 X86 :DIV r32 1/1 eax upd L: 3.49ns= 14.0c T: 3.49ns= 14.00c 443 X86 :DIV r32 1/1 eax/edx upd L: 3.49ns= 14.0c T: 3.49ns= 14.00c 458 X86 :CBW L: 0.25ns= 1.0c T: 0.25ns= 1.00c 459 X86 :CWDE L: 0.25ns= 1.0c T: 0.25ns= 1.00c 461 X86 :CWD L: 0.27ns= 1.1c T: 0.27ns= 1.08c 462 X86 :CDQ L: 0.12ns= 0.5c T: 0.12ns= 0.49c 464 X86 :CLC L: 0.12ns= 0.5c T: 0.12ns= 0.50c 465 X86 :STC L: 0.12ns= 0.5c T: 0.12ns= 0.49c 466 X86 :CMC L: 0.23ns= 0.9c T: 0.23ns= 0.92c 467 X86 :CLD L: 0.75ns= 3.0c T: 0.75ns= 3.00c 468 X86 :STD L: 1.00ns= 4.0c T: 1.00ns= 4.00c 469 X86 :AAA L: 1.49ns= 6.0c T: 1.49ns= 6.00c 470 X86 :AAD L: 0.75ns= 3.0c T: 0.75ns= 3.00c 471 X86 :AAM L: 1.91ns= 7.7c T: 1.91ns= 7.67c 472 X86 :AAS L: 1.49ns= 6.0c T: 1.49ns= 6.00c 473 X86 :DAA L: 1.99ns= 8.0c T: 1.99ns= 8.00c 474 X86 :DAS L: 2.49ns= 10.0c T: 2.49ns= 10.00c 475 X86 :LAHF L: 1.99ns= 8.0c T: 1.99ns= 8.00c 476 X86 :SAHF L: 2.49ns= 10.0c T: 2.49ns= 10.00c 477 X86 :PUSHA L: [no true dep.] T: 2.64ns= 10.58c 478 X86 :POPA L: [no true dep.] T: 2.45ns= 9.83c 479 X86 :PUSHA + POPA L: 4.24ns= 17.0c T: 4.24ns= 17.00c 480 X86 :PUSHAD L: [no true dep.] T: 3.11ns= 12.50c 481 X86 :POPAD L: [no true dep.] T: 3.41ns= 13.67c 482 X86 :PUSHAD + POPAD L: 4.24ns= 17.0c T: 4.24ns= 17.00c 483 X86 :PUSH r16 L: [no true dep.] T: 0.25ns= 1.00c 484 X86 :POP r16 L: [no true dep.] T: 0.25ns= 1.00c 485 X86 :PUSH r16 + POP r16 L: 2.24ns= 9.0c T: 0.35ns= 1.42c 486 X86 :PUSH r32 L: [no true dep.] T: 0.25ns= 1.00c 487 X86 :POP r32 L: [no true dep.] T: 0.29ns= 1.17c 488 X86 :PUSH r32 + POP r32 L: 1.99ns= 8.0c T: 0.50ns= 2.00c 489 X86 :PUSH imm8 L: [no true dep.] T: 0.25ns= 1.00c 490 X86 :PUSH imm8 + POP r32 L: 0.64ns= 2.6c T: 0.52ns= 2.08c 491 X86 :PUSH imm32 L: [no true dep.] T: 0.25ns= 1.00c 492 X86 :PUSH imm32 + POP r32 L: 0.62ns= 2.5c T: 0.54ns= 2.17c 493 X86 :PUSH [m16] L: [no true dep.] T: 0.29ns= 1.17c 494 X86 :POP [m16] L: [no true dep.] T: 0.35ns= 1.42c 495 X86 :PUSH [m16] + POP [m16] L: 3.99ns= 16.0c T: 0.42ns= 1.67c 496 X86 :PUSH [m32] L: [no true dep.] T: 0.31ns= 1.25c 497 X86 :POP [m32] L: [no true dep.] T: 0.35ns= 1.42c 498 X86 :PUSH [m32] + POP [m32] L: 3.99ns= 16.0c T: 0.46ns= 1.83c 499 X86 :PUSHF L: [no true dep.] T: 1.00ns= 4.00c 501 X86 :PUSHF + POPF L: 6.83ns= 27.4c T: 6.85ns= 27.50c 502 X86 :PUSHFD L: [no true dep.] T: 1.00ns= 4.00c 503 X86 :POPFD L: [no true dep.] T: 4.61ns= 18.50c 504 X86 :PUSHFD + POPFD L: 6.83ns= 27.4c T: 6.85ns= 27.50c 505 X86 :CMPSB L: 0.75ns= 3.0c T: 0.75ns= 3.00c 506 X86 :CMPSW L: 0.75ns= 3.0c T: 0.77ns= 3.08c 507 X86 :CMPSD L: 0.79ns= 3.2c T: 0.77ns= 3.08c 509 X86 :REPE CMPSB BW in L1D: 0.49 B/c 1949MiB/s 510 X86 :REPE CMPSW BW in L1D: 0.97 B/c 3879MiB/s 511 X86 :REPE CMPSD BW in L1D: 1.92 B/c 7723MiB/s 513 X86 :LODSB L: 0.75ns= 3.0c T: 0.75ns= 3.00c 514 X86 :LODSW L: 0.75ns= 3.0c T: 0.75ns= 3.00c 515 X86 :LODSD L: 0.75ns= 3.0c T: 0.75ns= 3.00c 517 X86 :REP LODSB BW in L1D: 0.33 B/c 1307MiB/s 518 X86 :REP LODSW BW in L1D: 0.65 B/c 2612MiB/s 519 X86 :REP LODSD BW in L1D: 1.56 B/c 6259MiB/s 521 X86 :STOSB L: 0.75ns= 3.0c T: 0.75ns= 3.00c 522 X86 :STOSW L: 0.75ns= 3.0c T: 0.75ns= 3.00c 523 X86 :STOSD L: 0.75ns= 3.0c T: 0.75ns= 3.00c 525 X86 :REP STOSB BW in L1D: 5.59 B/c 22444MiB/s 526 X86 :REP STOSW BW in L1D: 5.58 B/c 22413MiB/s 527 X86 :REP STOSD BW in L1D: 5.58 B/c 22390MiB/s 529 X86 :MOVSB L: 0.75ns= 3.0c T: 0.75ns= 3.00c 530 X86 :MOVSW L: 0.75ns= 3.0c T: 0.75ns= 3.00c 531 X86 :MOVSD L: 0.75ns= 3.0c T: 0.75ns= 3.00c 533 X86 :REP MOVSB BW in L1D:10.18 B/c 40870MiB/s 534 X86 :REP MOVSW BW in L1D:10.36 B/c 41568MiB/s 535 X86 :REP MOVSD BW in L1D:10.35 B/c 41542MiB/s 537 X86 :SCASB L: 0.75ns= 3.0c T: 0.75ns= 3.00c 538 X86 :SCASW L: 0.75ns= 3.0c T: 0.75ns= 3.00c 539 X86 :SCASD L: 0.75ns= 3.0c T: 0.75ns= 3.00c 541 X86 :REPNE SCASB BW in L1D: 0.28 B/c 1113MiB/s 542 X86 :REPNE SCASW BW in L1D: 0.55 B/c 2221MiB/s 543 X86 :REPNE SCASD BW in L1D: 1.10 B/c 4429MiB/s 545 X86 :XADD r8, r8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 546 X86 :XADD r16, r16 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 547 X86 :XADD r32, r32 L: 0.50ns= 2.0c T: 0.17ns= 0.66c 549 X86 :CMPXCHG r8, r8 L: 0.75ns= 3.0c T: 0.75ns= 3.00c 550 X86 :CMPXCHG r16, r16 L: 0.93ns= 3.8c T: 0.75ns= 3.00c 551 X86 :CMPXCHG r32, r32 L: 0.75ns= 3.0c T: 0.75ns= 3.00c 553 CMPX8 :CMPXCHG8B L: 6.56ns= 26.3c T: 6.56ns= 26.33c 555 X86 :RDTSC L: [no true dep.] T: 9.84ns= 39.50c 556 X86 :CPUID (EAX = 0) L: 27.41ns=110.0c T: 27.41ns=110.00c 557 X86 :CPUID (EAX = 1) L: 70.22ns=281.8c T: 70.22ns=281.83c 558 POPCNT:POPCNT r16, r16 L: 1.00ns= 4.0c T: 0.52ns= 2.08c 559 POPCNT:POPCNT r32, r32 L: 1.00ns= 4.0c T: 0.50ns= 2.00c 561 ABM :LZCNT r16, r16 L: 0.50ns= 2.0c T: 0.50ns= 2.00c 562 ABM :LZCNT r32, r32 L: 0.50ns= 2.0c T: 0.50ns= 2.00c 564 SSE4.2:CRC32 r32, r8 L: 0.75ns= 3.0c T: 0.50ns= 2.00c 565 SSE4.2:CRC32 r32, r16 L: 1.25ns= 5.0c T: 1.25ns= 5.00c 566 SSE4.2:CRC32 r32, r32 L: 1.49ns= 6.0c T: 1.49ns= 6.00c 569 X87 :FNOP L: [no true dep.] T: 0.06ns= 0.25c 570 X87 :FXCH st(i) L: 0.12ns= 0.5c T: 0.12ns= 0.49c 571 X87 :FCHS L: 0.50ns= 2.0c T: 0.50ns= 2.00c 572 X87 :FABS L: 0.50ns= 2.0c T: 0.50ns= 2.00c 573 X87 :FTST L: [no true dep.] T: 0.12ns= 0.50c 574 X87 :FXAM L: [no true dep.] T: 0.12ns= 0.50c 575 CMOV :FCMOVE st, st(i) L: 0.75ns= 3.0c T: 0.75ns= 3.00c 576 X87 :FADD st(i), st (st = 0.0) L: 1.25ns= 5.0c T: 0.18ns= 0.71c 577 X87 :FADD st(i), st L: 1.25ns= 5.0c T: 0.18ns= 0.71c 578 X87 :FADD st, st(i), FXCH st(i) L: 1.25ns= 5.0c T: 0.25ns= 1.00c 579 X87 :FMUL st(i), st (st = 0.0) L: 1.25ns= 5.0c T: 0.18ns= 0.71c 580 X87 :FMUL st(i), st L: 1.25ns= 5.0c T: 0.18ns= 0.71c 581 X87 :FMUL st, st(i), FXCH st(i) L: 1.25ns= 5.0c T: 0.25ns= 1.00c 582 X87 :FMUL + FADD st, st(i) L: 2.49ns= 10.0c T: [not enough reg] 583 X87 :FMUL st(2i) FADD st(2i+1) L: 1.25ns= 5.0c T: [not enough reg] 584 X87 :FDIV32 st(i), st L: 5.48ns= 22.0c T: 1.62ns= 6.50c 585 X87 :FDIV64 st(i), st L: 6.73ns= 27.0c T: 2.24ns= 9.00c 586 X87 :FDIV80 st(i), st L: 10.46ns= 42.0c T: 4.11ns= 16.50c 587 X87 :FDIV80 (0.0l/x) L: 2.24ns= 9.0c T: 1.12ns= 4.50c 588 X87 :FDIV80 (x/1.0l) L: 2.24ns= 9.0c T: 1.12ns= 4.50c 589 X87 :FDIV80 (x/2.0l) L: 2.24ns= 9.0c T: 1.12ns= 4.50c 590 X87 :FDIV80 (x/0.5l) L: 2.24ns= 9.0c T: 1.12ns= 4.50c 591 X87 :FSQRT32 st L: 6.73ns= 27.0c T: 2.10ns= 8.42c 592 X87 :FSQRT64 st L: 9.47ns= 38.0c T: 3.47ns= 13.92c 593 X87 :FSQRT80 st L: 12.96ns= 52.0c T: 5.21ns= 20.92c 594 X87 :FSQRT80 (0.0l) L: 2.24ns= 9.0c T: 1.12ns= 4.50c 595 X87 :FSQRT80 (1.0l) L: 2.24ns= 9.0c T: 1.12ns= 4.50c 596 X87 :FDECSTP L: [no true dep.] T: 0.06ns= 0.25c 597 X87 :FINCSTP L: [no true dep.] T: 0.06ns= 0.25c 598 X87 :FCOM st(i) L: [no true dep.] T: 0.12ns= 0.50c 599 CMOV :FCOMI st, st(i) L: [no true dep.] T: 0.25ns= 1.00c 600 X87 :FSIN80 (0.0) L: 15.70ns= 63.0c T: 15.72ns= 63.08c 601 X87 :FSIN80 (0.0) + FADD L: 17.40ns= 69.8c T: 15.72ns= 63.08c 602 X87 :FSIN80 (1.0) + FADD L: 40.36ns=162.0c T: 28.67ns=115.08c 603 X87 :FSIN80 (4Pi) + FADD L: 48.11ns=193.1c T: 33.41ns=134.08c 604 X87 :FSIN80 (2Pi) + FADD L: 48.09ns=193.0c T: 33.43ns=134.17c 605 X87 :FSIN80 (Pi) + FADD L: 47.84ns=192.0c T: 33.18ns=133.17c 606 X87 :FSIN80 (Pi/2) + FADD L: 47.84ns=192.0c T: 33.16ns=133.08c 607 X87 :FSIN80 (Pi/4) + FADD L: 40.11ns=161.0c T: 28.57ns=114.67c 608 X87 :FSIN80 (Pi/8) + FADD L: 38.87ns=156.0c T: 27.36ns=109.83c 609 X87 :FSIN80 (Pi/16) + FADD L: 38.87ns=156.0c T: 29.69ns=119.17c 610 X87 :FSIN80 (Pi/32) + FADD L: 38.87ns=156.0c T: 29.67ns=119.08c 611 X87 :FCOS80 (0.73908513...) L: 38.87ns=156.0c T: 29.17ns=117.08c 612 X87 :FCOS80 (0.73908513...)+FADD L: 39.86ns=160.0c T: 29.94ns=120.17c 613 X87 :FCOS80 (0.0) + FADD L: 17.65ns= 70.8c T: 15.97ns= 64.08c 614 X87 :FCOS80 (1.0) + FADD L: 40.61ns=163.0c T: 29.75ns=119.42c 615 X87 :FCOS80 (4Pi) + FADD L: 48.33ns=194.0c T: 33.43ns=134.17c 616 X87 :FCOS80 (2Pi) + FADD L: 48.33ns=194.0c T: 33.43ns=134.17c 617 X87 :FCOS80 (Pi) + FADD L: 48.58ns=195.0c T: 33.68ns=135.17c 618 X87 :FCOS80 (Pi/2) + FADD L: 48.09ns=193.0c T: 33.16ns=133.08c 619 X87 :FCOS80 (Pi/4) + FADD L: 40.36ns=162.0c T: 30.04ns=120.58c 620 X87 :FCOS80 (Pi/8) + FADD L: 39.12ns=157.0c T: 28.20ns=113.17c 621 X87 :FCOS80 (Pi/16) + FADD L: 39.12ns=157.0c T: 28.20ns=113.17c 622 X87 :FCOS80 (Pi/32) + FADD L: 39.12ns=157.0c T: 28.20ns=113.17c 623 MMX :EMMS L: 0.06ns= 0.2c T: 0.06ns= 0.24c 624 MMX :MOVD r32, mm L: [diff. reg. set] T: 0.25ns= 1.00c 625 MMX :MOVD mm, r32 L: [diff. reg. set] T: 0.25ns= 1.00c 626 MMX :MOVD r32, mm+MOVD mm, r32 L: 4.48ns= 18.0c T: 0.09ns= 0.35c 630 MMX :MOVD mm, [m32] L: [memory dep.] T: 0.12ns= 0.50c 631 MMX :MOVD [m32], mm L: [memory dep.] T: 0.25ns= 1.00c 632 MMX :MOVD mm,[m32]+MOVD [m32],mm L: 2.74ns= 11.0c T: 0.20ns= 0.80c 633 MMX :MOVQ mm, mm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 634 MMX :MOVQ mm, [m64] L: [memory dep.] T: 0.12ns= 0.50c 635 MMX :MOVQ [m64], mm L: [memory dep.] T: 0.25ns= 1.00c 636 MMX :MOVQ mm,[m64]+MOVQ [m64],mm L: 2.74ns= 11.0c T: 0.16ns= 0.62c 637 MMXP :MOVNTQ [m64], mm L: [memory dep.] T: 2.00ns= 2.00c 638 MMXP :PMOVMSKB r32, mm L: [diff. reg. set] T: 0.25ns= 1.00c 640 MMXP :MASKMOVQ mm, mm L: [memory dep.] T: 67.00ns= 67.00c 641 MMX :PADDB mm, mm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 642 MMX :PADDW mm, mm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 643 MMX :PADDD mm, mm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 644 SSE2 :PADDQ mm, mm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 645 MMX :PADDSB mm, mm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 646 MMX :PADDSW mm, mm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 647 MMX :PADDUSB mm, mm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 648 MMX :PADDUSW mm, mm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 649 MMX :PSUBB mm, mm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 650 MMX :PSUBB mm1, mm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 651 MMX :PSUBW mm, mm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 652 MMX :PSUBW mm1, mm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 653 MMX :PSUBD mm, mm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 654 MMX :PSUBD mm1, mm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 655 SSE2 :PSUBQ mm, mm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 656 SSE2 :PSUBQ mm1, mm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 657 MMX :PSUBSB mm, mm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 658 MMX :PSUBSB mm1, mm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 659 MMX :PSUBSW mm, mm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 660 MMX :PSUBSW mm1, mm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 661 MMX :PSUBUSB mm, mm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 662 MMX :PSUBUSB mm1, mm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 663 MMX :PSUBUSW mm, mm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 664 MMX :PSUBUSW mm1, mm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 665 MMX :PCMPEQB mm, mm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 666 MMX :PCMPEQB mm1, mm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 667 MMX :PCMPEQW mm, mm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 668 MMX :PCMPEQW mm1, mm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 669 MMX :PCMPEQD mm, mm L: 0.12ns= 0.5c T: 0.12ns= 0.50c 670 MMX :PCMPEQD mm1, mm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 671 MMX :PCMPGTB mm, mm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 672 MMX :PCMPGTB mm1, mm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 673 MMX :PCMPGTW mm, mm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 674 MMX :PCMPGTW mm1, mm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 675 MMX :PCMPGTD mm, mm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 676 MMX :PCMPGTD mm1, mm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 677 MMX :PAND mm, mm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 678 MMX :PAND mm1, mm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 679 MMX :PANDN mm, mm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 680 MMX :PANDN mm1, mm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 681 MMX :POR mm, mm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 682 MMX :POR mm1, mm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 683 MMX :PXOR mm, mm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 684 MMX :PXOR mm1, mm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 685 MMX :PMULHW mm, mm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 686 MMXP :PMULHUW mm, mm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 688 SSSE3 :PMULHRSW mm, mm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 689 MMX :PMULLW mm, mm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 690 SSE2 :PMULUDQ mm, mm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 691 SSSE3 :PMADDUBSW mm, mm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 692 MMX :PMADDWD mm, mm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 693 MMX :PSLLW mm, mm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 694 MMX :PSLLW mm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 695 MMX :PSLLD mm, mm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 696 MMX :PSLLD mm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 697 MMX :PSLLQ mm, mm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 698 MMX :PSLLQ mm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 699 MMX :PSRAW mm, mm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 700 MMX :PSRAW mm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 701 MMX :PSRAD mm, mm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 702 MMX :PSRAD mm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 703 MMX :PSRLW mm, mm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 704 MMX :PSRLW mm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 705 MMX :PSRLD mm, mm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 706 MMX :PSRLD mm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 707 MMX :PSRLQ mm, mm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 708 MMX :PSRLQ mm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 709 MMX :PUNPCKHBW mm, mm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 710 MMX :PUNPCKHWD mm, mm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 711 MMX :PUNPCKHDQ mm, mm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 712 MMX :PUNPCKLBW mm, mm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 713 MMX :PUNPCKLWD mm, mm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 714 MMX :PUNPCKLDQ mm, mm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 715 MMX :PACKSSWB mm, mm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 716 MMX :PACKUSWB mm, mm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 717 MMX :PACKSSDW mm, mm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 753 MMXP :PAVGB mm, mm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 754 MMXP :PAVGW mm, mm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 755 MMXP :PEXTRW r32, mm, im8 L: [diff. reg. set] T: 0.25ns= 1.00c 756 MMXP :PINSRW mm, r32, im8 L: [diff. reg. set] T: 0.25ns= 1.00c 757 MMXP :PEXTRW + PINSRW r32 L: 0.50ns= 2.0c T: 0.50ns= 2.00c 761 MMXP :PMAXSW mm, mm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 762 MMXP :PMAXUB mm, mm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 763 MMXP :PMINSW mm, mm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 764 MMXP :PMINUB mm, mm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 765 MMXP :PSADBW mm, mm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 766 MMXP :PSHUFW mm, mm, im8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 767 MMXP :PREFETCHNTA [mem] L: [memory dep.] T: 0.12ns= 0.50c 768 MMXP :PREFETCHT0 [mem] L: [memory dep.] T: 0.12ns= 0.50c 769 MMXP :PREFETCHT1 [mem] L: [memory dep.] T: 0.12ns= 0.50c 770 MMXP :PREFETCHT2 [mem] L: [memory dep.] T: 0.12ns= 0.50c 771 MMXP :SFENCE L: 21.20ns= 85.1c T: 21.20ns= 85.08c 772 SSE2 :LFENCE L: 0.06ns= 0.2c T: 0.06ns= 0.24c 773 SSE2 :MFENCE L: 21.20ns= 85.1c T: 21.20ns= 85.08c 774 SSSE3 :PABSB mm, mm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 775 SSSE3 :PABSW mm, mm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 776 SSSE3 :PABSD mm, mm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 777 SSSE3 :PALIGNR mm, mm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 778 SSSE3 :PHADDW mm, mm L: 1.25ns= 5.0c T: 0.50ns= 2.00c 779 SSSE3 :PHADDD mm, mm L: 1.25ns= 5.0c T: 0.50ns= 2.00c 780 SSSE3 :PHADDSW mm, mm L: 1.25ns= 5.0c T: 0.50ns= 2.00c 781 SSSE3 :PHSUBW mm, mm L: 1.25ns= 5.0c T: 0.50ns= 2.00c 782 SSSE3 :PHSUBD mm, mm L: 1.25ns= 5.0c T: 0.50ns= 2.00c 783 SSSE3 :PHSUBSW mm, mm L: 1.25ns= 5.0c T: 0.50ns= 2.00c 784 SSSE3 :PSHUFB mm, mm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 785 SSSE3 :PSIGNB mm, mm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 786 SSSE3 :PSIGNW mm, mm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 787 SSSE3 :PSIGND mm, mm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 788 SSE :MOVHLPS xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 789 SSE :MOVHLPS xmm1, xmm2 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 790 AVX :VMOVHLPS xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 791 AVX :VMOVHLPS xmm1, xmm1, xmm2 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 792 SSE :MOVSS xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 793 AVX :VMOVSS xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 794 SSE :MOVSS xmm, [m32] L: [memory dep.] T: 0.12ns= 0.50c 795 SSE :MOVSS [m32], xmm L: [memory dep.] T: 0.25ns= 1.00c 796 SSE :MOVSS LS pair L: 2.74ns= 11.0c T: 0.20ns= 0.80c 797 AVX :VMOVSS xmm, [m32] L: [memory dep.] T: 0.12ns= 0.50c 798 AVX :VMOVSS [m32], xmm L: [memory dep.] T: 0.25ns= 1.00c 799 AVX :VMOVSS LS pair L: 2.74ns= 11.0c T: 0.12ns= 0.46c 800 SSE :MOVLPS xmm, [m32] L: [memory dep.] T: 0.12ns= 0.50c 801 SSE :MOVLPS [m32], xmm L: [memory dep.] T: 0.25ns= 1.00c 802 SSE :MOVLPS LS pair L: 3.24ns= 13.0c T: 0.25ns= 1.00c 803 AVX :VMOVLPS xmm, xmm, [m32] L: [memory dep.] T: 0.12ns= 0.50c 804 AVX :VMOVLPS [m32], xmm L: [memory dep.] T: 0.25ns= 1.00c 805 AVX :VMOVLPS LS pair L: 3.24ns= 13.0c T: 0.25ns= 1.00c 806 SSE :MOVHPS xmm, [m32] L: [memory dep.] T: 0.25ns= 1.00c 807 SSE :MOVHPS [m32], xmm L: [memory dep.] T: 0.25ns= 1.00c 808 SSE :MOVHPS LS pair L: 3.74ns= 15.0c T: 0.25ns= 1.00c 809 AVX :VMOVHPS xmm, xmm, [m32] L: [memory dep.] T: 0.25ns= 1.00c 810 AVX :VMOVHPS [m32], xmm L: [memory dep.] T: 0.25ns= 1.00c 811 AVX :VMOVHPS LS pair L: 3.74ns= 15.0c T: 0.25ns= 1.00c 812 SSE :MOVAPS xmm, xmm L: 0.06ns= 0.2c T: 0.06ns= 0.24c 813 SSE :MOVAPS xmm, [m128] L: [memory dep.] T: 0.12ns= 0.50c 814 SSE :MOVAPS [m128], xmm L: [memory dep.] T: 0.25ns= 1.00c 815 SSE :MOVAPS LS pair L: 2.74ns= 11.0c T: 0.16ns= 0.63c 816 AVX :VMOVAPS xmm, xmm L: 0.06ns= 0.2c T: 0.06ns= 0.24c 817 AVX :VMOVAPS xmm, [m128] L: [memory dep.] T: 0.12ns= 0.50c 818 AVX :VMOVAPS [m128], xmm L: [memory dep.] T: 0.25ns= 1.00c 819 AVX :VMOVAPS LS pair L: 2.74ns= 11.0c T: 0.16ns= 0.63c 820 SSE :MOVUPS xmm, xmm L: 0.06ns= 0.2c T: 0.06ns= 0.24c 821 SSE :MOVUPS xmm, [m128] L: [memory dep.] T: 0.12ns= 0.50c 822 SSE :MOVUPS [m128], xmm L: [memory dep.] T: 0.25ns= 1.00c 823 SSE :MOVUPS aligned LS pair L: 2.74ns= 11.0c T: 0.16ns= 0.63c 824 SSE :MOVUPS xmm, [m128 + 4] L: [memory dep.] T: 1.00ns= 4.00c 825 SSE :MOVUPS [m128 + 4], xmm L: [memory dep.] T: 1.02ns= 4.08c 826 SSE :MOVUPS unaligned LS pair L: 4.09ns= 16.4c T: 0.81ns= 3.25c 827 AVX :VMOVUPS xmm, xmm L: 0.06ns= 0.2c T: 0.06ns= 0.24c 828 AVX :VMOVUPS xmm, [m128] L: [memory dep.] T: 0.12ns= 0.50c 829 AVX :VMOVUPS [m128], xmm L: [memory dep.] T: 0.25ns= 1.00c 830 AVX :VMOVUPS aligned LS pair L: 2.74ns= 11.0c T: 0.15ns= 0.59c 831 AVX :VMOVUPS xmm, [m128 + 4] L: [memory dep.] T: 0.23ns= 0.92c 832 AVX :VMOVUPS [m128 + 4], xmm L: [memory dep.] T: 0.50ns= 2.00c 833 AVX :VMOVUPS unaligned LS pair L: 4.11ns= 16.5c T: 0.33ns= 1.33c 834 SSE4A :MOVNTSS [m32], xmm L: [memory dep.] T: 2.00ns= 2.00c 835 SSE :MOVNTPS [m128], xmm L: [memory dep.] T: 2.00ns= 2.00c 836 AVX :VMOVNTPS [m128], xmm L: [memory dep.] T: 2.00ns= 2.00c 837 SSE :MOVMSKPS r32, xmm L: [diff. reg. set] T: 0.25ns= 1.00c 838 AVX :VMOVMSKPS r32, xmm L: [diff. reg. set] T: 0.25ns= 1.00c 839 AVX :VMASKMOVPS xmm,xmm,[m128+4] L: [memory dep.] T: 0.39ns= 1.58c 840 AVX :VMASKMOVPS [m128+4],xmm,xmm L: [memory dep.] T: 23.44ns= 94.08c 841 AVX :VMASKMOVPS unaligned LSpair L: 28.40ns=114.0c T: 23.61ns= 94.75c 842 SSE :UNPCKLPS xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 843 AVX :VUNPCKLPS xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 844 SSE :UNPCKHPS xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 845 AVX :VUNPCKHPS xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 846 SSE :SHUFPS xmm, xmm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 847 AVX :VSHUFPS xmm, xmm, xmm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 848 AVX :VPERMILPS xmm, xmm, xmm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 849 AVX :VPERMILPS xmm, xmm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 850 SSE :COMISS xmm, xmm L: [no true dep.] T: 0.25ns= 1.00c 851 AVX :VCOMISS xmm, xmm L: [no true dep.] T: 0.25ns= 1.00c 852 SSE :UCOMISS xmm, xmm L: [no true dep.] T: 0.25ns= 1.00c 853 AVX :VUCOMISS xmm, xmm L: [no true dep.] T: 0.25ns= 1.00c 854 SSE :CMPSS xmm, xmm, imm8 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 855 SSE :CMPPS xmm, xmm, imm8 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 856 AVX :VCMPSS xmm, xmm, xmm, imm8 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 857 AVX :VCMPPS xmm, xmm, xmm, imm8 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 858 SSE :SUBSS xmm, xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 859 AVX :VSUBSS xmm, xmm, xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 860 SSE :SUBPS xmm, xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 861 AVX :VSUBPS xmm, xmm, xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 862 SSE :ADDSS xmm, xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 863 AVX :VADDSS xmm, xmm, xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 864 SSE :ADDPS xmm, xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 865 AVX :VADDPS xmm, xmm, xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 866 SSE :MULSS xmm, xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 867 AVX :VMULSS xmm, xmm, xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 868 SSE :MULPS xmm, xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 869 AVX :VMULPS xmm, xmm, xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 870 SSE :MULSS+ADDSS xmm, xmm L: 2.49ns= 10.0c T: [not enough reg] 871 AVX :VMULSS+VADDSS xmm, xmm, xmm L: 2.49ns= 10.0c T: [not enough reg] 872 SSE :MULPS+ADDPS xmm, xmm L: 2.49ns= 10.0c T: [not enough reg] 873 AVX :VMULPS+VADDPS xmm, xmm, xmm L: 2.49ns= 10.0c T: [not enough reg] 874 SSE :MULSS xm1,xm1 ADDSS xm2,xm2 L: 1.25ns= 5.0c T: [not enough reg] 875 AVX :VMULSS xmm1.. VADDSS xmm2.. L: 1.25ns= 5.0c T: [not enough reg] 876 SSE :MULPS xm1,xm1 ADDPS xm2,xm2 L: 1.25ns= 5.0c T: [not enough reg] 877 AVX :VMULPS xmm1.. VADDPS xmm2.. L: 1.25ns= 5.0c T: [not enough reg] 878 SSE :MAXSS xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 879 AVX :VMAXSS xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 880 SSE :MAXPS xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 881 AVX :VMAXPS xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 882 SSE :MINSS xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 883 AVX :VMINSS xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 884 SSE :MINPS xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 885 AVX :VMINPS xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 886 SSE :ANDNPS xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 887 SSE :ANDNPS xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 888 AVX :VANDNPS xmm, xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 889 AVX :VANDNPS xmm1, xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 890 SSE :ANDPS xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 891 SSE :ANDPS xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 892 AVX :VANDPS xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 893 AVX :VANDPS xmm1, xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 894 SSE :ORPS xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 895 SSE :ORPS xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 896 AVX :VORPS xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 897 AVX :VORPS xmm1, xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 898 SSE :XORPS xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 899 SSE :XORPS xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 900 AVX :VXORPS xmm, xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 901 AVX :VXORPS xmm1, xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 902 SSE :DIVSS xmm, xmm L: 5.98ns= 24.0c T: 2.37ns= 9.50c 903 SSE :DIVSS (0.0f/x) L: 2.24ns= 9.0c T: 1.12ns= 4.50c 904 SSE :DIVSS (x/1.0f) L: 2.24ns= 9.0c T: 1.12ns= 4.50c 905 SSE :DIVSS (x/2.0f) L: 1.97ns= 7.9c T: 1.12ns= 4.50c 906 SSE :DIVSS (x/0.5f) L: 1.97ns= 7.9c T: 1.12ns= 4.50c 907 AVX :VDIVSS xmm, xmm, xmm L: 5.98ns= 24.0c T: 2.10ns= 8.42c 908 AVX :VDIVSS (0.0f/x) L: 2.24ns= 9.0c T: 1.12ns= 4.50c 909 AVX :VDIVSS (x/1.0f) L: 2.24ns= 9.0c T: 1.12ns= 4.50c 910 AVX :VDIVSS (x/2.0f) L: 2.22ns= 8.9c T: 1.12ns= 4.50c 911 AVX :VDIVSS (x/0.5f) L: 2.22ns= 8.9c T: 1.12ns= 4.50c 912 SSE :DIVPS xmm, xmm L: 5.98ns= 24.0c T: 2.37ns= 9.50c 913 SSE :DIVPS (0.0f/x) L: 2.24ns= 9.0c T: 1.12ns= 4.50c 914 SSE :DIVPS (x/1.0f) L: 2.24ns= 9.0c T: 1.12ns= 4.50c 915 SSE :DIVPS (x/2.0f) L: 1.79ns= 7.2c T: 1.12ns= 4.50c 916 SSE :DIVPS (x/0.5f) L: 1.79ns= 7.2c T: 1.12ns= 4.50c 917 AVX :VDIVPS xmm, xmm, xmm L: 5.98ns= 24.0c T: 2.10ns= 8.42c 918 AVX :VDIVPS (0.0f/x) L: 2.24ns= 9.0c T: 1.12ns= 4.50c 919 AVX :VDIVPS (x/1.0f) L: 2.24ns= 9.0c T: 1.12ns= 4.50c 920 AVX :VDIVPS (x/2.0f) L: 1.81ns= 7.3c T: 1.12ns= 4.50c 921 AVX :VDIVPS (x/0.5f) L: 1.81ns= 7.3c T: 1.12ns= 4.50c 922 SSE :SQRTSS xmm, xmm L: 50.29ns=201.8c T: 45.20ns=181.42c 923 SSE :SQRTSS (0.0f) L: 2.24ns= 9.0c T: 1.10ns= 4.42c 924 SSE :SQRTSS (1.0f) L: 2.24ns= 9.0c T: 1.12ns= 4.50c 925 AVX :VSQRTSS xmm, xmm L: 46.22ns=185.5c T: 45.41ns=182.25c 926 AVX :VSQRTSS (0.0f) L: 2.24ns= 9.0c T: 1.33ns= 5.33c 927 AVX :VSQRTSS (1.0f) L: 2.24ns= 9.0c T: 1.33ns= 5.33c 928 SSE :SQRTPS xmm, xmm L: 7.23ns= 29.0c T: 2.78ns= 11.17c 929 SSE :SQRTPS (0.0f) L: 2.24ns= 9.0c T: 1.12ns= 4.50c 930 SSE :SQRTPS (1.0f) L: 2.24ns= 9.0c T: 1.12ns= 4.50c 931 AVX :VSQRTPS xmm, xmm L: 7.23ns= 29.0c T: 2.99ns= 12.00c 932 AVX :VSQRTPS (0.0f) L: 2.24ns= 9.0c T: 1.12ns= 4.50c 933 AVX :VSQRTPS (1.0f) L: 2.24ns= 9.0c T: 1.12ns= 4.50c 934 SSE :RCPSS xmm, xmm L: 1.25ns= 5.0c T: 0.25ns= 1.00c 935 AVX :VRCPSS xmm, xmm, xmm L: 1.25ns= 5.0c T: 0.25ns= 1.00c 936 SSE :RCPPS xmm, xmm L: 1.25ns= 5.0c T: 0.25ns= 1.00c 937 AVX :VRCPPS xmm, xmm, xmm L: 1.25ns= 5.0c T: 0.25ns= 1.00c 938 SSE :RSQRTSS xmm, xmm L: 1.25ns= 5.0c T: 0.25ns= 1.00c 939 AVX :VRSQRTSS xmm, xmm, xmm L: 1.25ns= 5.0c T: 0.25ns= 1.00c 940 SSE :RSQRTPS xmm, xmm L: 1.25ns= 5.0c T: 0.25ns= 1.00c 941 AVX :VRSQRTPS xmm, xmm, xmm L: 1.25ns= 5.0c T: 0.25ns= 1.00c 942 SSE :CVTPI2PS xmm, mm L: [diff. reg. set] T: 0.25ns= 1.00c 943 SSE :CVTPS2PI mm, xmm L: [diff. reg. set] T: 0.25ns= 1.00c 944 SSE :CVTPS2PI + CVTPI2PS L: 2.99ns= 12.0c T: 0.25ns= 1.00c 945 SSE :CVTTPS2PI mm, xmm L: [diff. reg. set] T: 0.25ns= 1.00c 946 SSE :CVTTPS2PI + CVTPI2PS L: 2.99ns= 12.0c T: 0.25ns= 1.00c 947 SSE :CVTSI2SS xmm, r32 L: [diff. reg. set] T: 0.27ns= 1.08c 948 SSE :CVTSS2SI r32, xmm L: [diff. reg. set] T: 0.25ns= 1.00c 949 SSE :CVTSS2SI + CVTSI2SS r32 L: 6.48ns= 26.0c T: 0.17ns= 0.68c 950 SSE :CVTTSS2SI r32, xmm L: [diff. reg. set] T: 0.25ns= 1.00c 951 SSE :CVTTSS2SI + CVTSI2SS r32 L: 6.48ns= 26.0c T: 0.17ns= 0.68c 952 AVX :VCVTSI2SS xmm, xmm, r32 L: [diff. reg. set] T: 0.27ns= 1.08c 953 AVX :VCVTSS2SI r32, xmm L: [diff. reg. set] T: 0.25ns= 1.00c 954 AVX :VCVTSS2SI + VCVTSI2SS r32 L: 6.48ns= 26.0c T: 0.37ns= 1.50c 955 AVX :VCVTTSS2SI r32, xmm L: [diff. reg. set] T: 0.25ns= 1.00c 956 AVX :VCVTTSS2SI + VCVTSI2SS r32 L: 6.48ns= 26.0c T: 0.37ns= 1.50c 967 SSE :STMXCSR [mem] L: [memory dep.] T: 4.48ns= 18.00c 968 SSE :LDMXCSR [mem] L: [memory dep.] T: 0.75ns= 3.00c 969 SSE :STMXCSR + LDMXCSR L: 6.98ns= 28.0c T: 6.98ns= 28.00c 970 SSE2 :MOVSD xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 971 SSE2 :MOVSD xmm, [m64] L: [memory dep.] T: 0.12ns= 0.50c 972 SSE2 :MOVSD [m64], xmm L: [memory dep.] T: 0.25ns= 1.00c 973 SSE2 :MOVSD LS pair L: 2.74ns= 11.0c T: 0.16ns= 0.62c 974 AVX :VMOVSD xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 975 AVX :VMOVSD xmm, [m64] L: [memory dep.] T: 0.12ns= 0.50c 976 AVX :VMOVSD [m64], xmm L: [memory dep.] T: 0.25ns= 1.00c 977 AVX :VMOVSD LS pair L: 2.74ns= 11.0c T: 0.16ns= 0.62c 978 SSE2 :MOVLPD xmm, [m64] L: [memory dep.] T: 0.12ns= 0.50c 979 SSE2 :MOVLPD [m64], xmm L: [memory dep.] T: 0.25ns= 1.00c 980 SSE2 :MOVLPD LS pair L: 3.24ns= 13.0c T: 0.25ns= 1.00c 981 AVX :VMOVLPD xmm, [m64] L: [memory dep.] T: 0.12ns= 0.50c 982 AVX :VMOVLPD [m64], xmm L: [memory dep.] T: 0.25ns= 1.00c 983 AVX :VMOVLPD LS pair L: 3.24ns= 13.0c T: 0.25ns= 1.00c 984 SSE2 :MOVHPD xmm, [m64] L: [memory dep.] T: 0.25ns= 1.00c 985 SSE2 :MOVHPD [m64], xmm L: [memory dep.] T: 0.25ns= 1.00c 986 SSE2 :MOVHPD LS pair L: 3.74ns= 15.0c T: 0.25ns= 1.00c 987 AVX :VMOVHPD xmm, [m64] L: [memory dep.] T: 0.25ns= 1.00c 988 AVX :VMOVHPD [m64], xmm L: [memory dep.] T: 0.25ns= 1.00c 989 AVX :VMOVHPD LS pair L: 3.74ns= 15.0c T: 0.25ns= 1.00c 990 SSE2 :MOVAPD xmm, xmm L: 0.06ns= 0.2c T: 0.06ns= 0.24c 991 SSE2 :MOVAPD xmm, [m128] L: [memory dep.] T: 0.12ns= 0.50c 992 SSE2 :MOVAPD [m128], xmm L: [memory dep.] T: 0.25ns= 1.00c 993 SSE2 :MOVAPD LS pair L: 2.74ns= 11.0c T: 0.16ns= 0.63c 994 AVX :VMOVAPD xmm, xmm L: 0.06ns= 0.2c T: 0.06ns= 0.24c 995 AVX :VMOVAPD xmm, [m128] L: [memory dep.] T: 0.12ns= 0.50c 996 AVX :VMOVAPD [m128], xmm L: [memory dep.] T: 0.25ns= 1.00c 997 AVX :VMOVAPD LS pair L: 2.74ns= 11.0c T: 0.16ns= 0.66c 998 SSE2 :MOVUPD xmm, xmm L: 0.06ns= 0.2c T: 0.06ns= 0.24c 999 SSE2 :MOVUPD xmm, [m128] L: [memory dep.] T: 0.12ns= 0.50c 1000 SSE2 :MOVUPD [m128], xmm L: [memory dep.] T: 0.25ns= 1.00c 1001 SSE2 :MOVUPD aligned LS pair L: 2.74ns= 11.0c T: 0.16ns= 0.63c 1002 SSE2 :MOVUPD xmm, [m128 + 4] L: [memory dep.] T: 0.42ns= 1.67c 1003 SSE2 :MOVUPD [m128 + 4], xmm L: [memory dep.] T: 0.50ns= 2.00c 1004 SSE2 :MOVUPD unaligned LS pair L: 4.11ns= 16.5c T: 0.27ns= 1.08c 1005 AVX :VMOVUPD xmm, xmm L: 0.06ns= 0.2c T: 0.06ns= 0.24c 1006 AVX :VMOVUPD xmm, [m128] L: [memory dep.] T: 0.12ns= 0.50c 1007 AVX :VMOVUPD [m128], xmm L: [memory dep.] T: 0.25ns= 1.00c 1008 AVX :VMOVUPD aligned LS pair L: 2.74ns= 11.0c T: 0.15ns= 0.59c 1009 AVX :VMOVUPD xmm, [m128 + 4] L: [memory dep.] T: 0.69ns= 2.75c 1010 AVX :VMOVUPD [m128 + 4], xmm L: [memory dep.] T: 0.48ns= 1.92c 1011 AVX :VMOVUPD unaligned LS pair L: 4.11ns= 16.5c T: 0.31ns= 1.25c 1012 SSE4A :MOVNTSD [m64], xmm L: [memory dep.] T: 2.00ns= 2.00c 1013 SSE2 :MOVNTPD [m128], xmm L: [memory dep.] T: 2.00ns= 2.00c 1014 AVX :VMOVNTPD [m128], xmm L: [memory dep.] T: 2.00ns= 2.00c 1015 SSE2 :MOVMSKPD r32, xmm L: [diff. reg. set] T: 0.25ns= 1.00c 1016 AVX :VMOVMSKPD r32, xmm L: [diff. reg. set] T: 0.25ns= 1.00c 1017 AVX :VMASKMOVPD xmm,xmm,[m128+4] L: [memory dep.] T: 0.73ns= 2.92c 1018 AVX :VMASKMOVPD [m128+4],xmm,xmm L: [memory dep.] T: 23.44ns= 94.08c 1019 AVX :VMASKMOVPD unaligned LSpair L: 28.40ns=114.0c T: 23.61ns= 94.75c 1020 SSE2 :UNPCKLPD xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1021 AVX :VUNPCKLPD xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1022 SSE2 :UNPCKHPD xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1023 AVX :VUNPCKHPD xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1024 SSE2 :SHUFPD xmm, xmm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1025 AVX :VSHUFPD xmm, xmm, xmm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1026 AVX :VPERMILPD xmm, xmm, xmm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1027 AVX :VPERMILPD xmm, xmm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1028 SSE2 :COMISD xmm, xmm L: [no true dep.] T: 0.25ns= 1.00c 1029 AVX :VCOMISD xmm, xmm L: [no true dep.] T: 0.25ns= 1.00c 1030 SSE2 :UCOMISD xmm, xmm L: [no true dep.] T: 0.25ns= 1.00c 1031 AVX :VUCOMISD xmm, xmm L: [no true dep.] T: 0.25ns= 1.00c 1032 SSE2 :CMPSD xmm, xmm, imm8 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1033 SSE2 :CMPPD xmm, xmm, imm8 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1034 AVX :VCMPSD xmm, xmm, xmm, imm8 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1035 AVX :VCMPPD xmm, xmm, xmm, imm8 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1036 SSE2 :SUBSD xmm, xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1037 AVX :VSUBSD xmm, xmm, xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1038 SSE2 :SUBPD xmm, xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1039 AVX :VSUBPD xmm, xmm, xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1040 SSE2 :ADDSD xmm, xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1041 AVX :VADDSD xmm, xmm, xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1042 SSE2 :ADDPD xmm, xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1043 AVX :VADDPD xmm, xmm, xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1044 SSE2 :MULSD xmm, xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1045 AVX :VMULSD xmm, xmm, xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1046 SSE2 :MULPD xmm, xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1047 AVX :VMULPD xmm, xmm, xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1048 SSE2 :MULSD+ADDSD xmm, xmm L: 2.49ns= 10.0c T: [not enough reg] 1049 AVX :VMULSD+VADDSD xmm, xmm, xmm L: 2.49ns= 10.0c T: [not enough reg] 1050 SSE2 :MULPD+ADDPD xmm, xmm L: 2.49ns= 10.0c T: [not enough reg] 1051 AVX :VMULPD+VADDPD xmm, xmm, xmm L: 2.49ns= 10.0c T: [not enough reg] 1052 SSE2 :MULSD xm1,xm1 ADDSD xm2,xm2 L: 1.25ns= 5.0c T: [not enough reg] 1053 AVX :VMULSD xmm1.. VADDSD xmm2.. L: 1.25ns= 5.0c T: [not enough reg] 1054 SSE2 :MULPD xm1,xm1 ADDPD xm2,xm2 L: 1.25ns= 5.0c T: [not enough reg] 1055 AVX :VMULPD xmm1.. VADDPD xmm2.. L: 1.25ns= 5.0c T: [not enough reg] 1056 SSE2 :MAXSD xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1057 AVX :VMAXSD xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1058 SSE2 :MAXPD xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1059 AVX :VMAXPD xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1060 SSE2 :MINSD xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1061 AVX :VMINSD xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1062 SSE2 :MINPD xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1063 AVX :VMINPD xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1064 SSE2 :ANDNPD xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1065 SSE2 :ANDNPD xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1066 AVX :VANDNPD xmm, xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1067 AVX :VANDNPD xmm1, xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1068 SSE2 :ANDPD xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1069 SSE2 :ANDPD xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1070 AVX :VANDPD xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1071 AVX :VANDPD xmm1, xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1072 SSE2 :ORPD xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1073 SSE2 :ORPD xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1074 AVX :VORPD xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1075 AVX :VORPD xmm1, xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1076 SSE2 :XORPD xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1077 SSE2 :XORPD xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1078 AVX :VXORPD xmm, xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1079 AVX :VXORPD xmm1, xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1080 SSE2 :DIVSD xmm, xmm L: 6.73ns= 27.0c T: 2.24ns= 9.00c 1081 SSE2 :DIVSD (0.0/x) L: 2.24ns= 9.0c T: 1.12ns= 4.50c 1082 SSE2 :DIVSD (x/1.0) L: 2.24ns= 9.0c T: 1.10ns= 4.42c 1083 SSE2 :DIVSD (x/2.0) L: 1.97ns= 7.9c T: 1.12ns= 4.50c 1084 SSE2 :DIVSD (x/0.5) L: 1.97ns= 7.9c T: 1.12ns= 4.50c 1085 AVX :VDIVSD xmm, xmm, xmm L: 6.73ns= 27.0c T: 2.24ns= 9.00c 1086 AVX :VDIVSD (0.0/x) L: 2.24ns= 9.0c T: 1.12ns= 4.50c 1087 AVX :VDIVSD (x/1.0) L: 2.24ns= 9.0c T: 1.12ns= 4.50c 1088 AVX :VDIVSD (x/2.0) L: 2.22ns= 8.9c T: 1.12ns= 4.50c 1089 AVX :VDIVSD (x/0.5) L: 2.22ns= 8.9c T: 1.12ns= 4.50c 1090 SSE2 :DIVPD xmm, xmm L: 6.73ns= 27.0c T: 2.24ns= 9.00c 1091 SSE2 :DIVPD (0.0/x) L: 2.24ns= 9.0c T: 1.12ns= 4.50c 1092 SSE2 :DIVPD (x/1.0) L: 2.24ns= 9.0c T: 1.12ns= 4.50c 1093 SSE2 :DIVPD (x/2.0) L: 1.79ns= 7.2c T: 1.12ns= 4.50c 1094 SSE2 :DIVPD (x/0.5) L: 1.79ns= 7.2c T: 1.12ns= 4.50c 1095 AVX :VDIVPD xmm, xmm, xmm L: 6.73ns= 27.0c T: 2.24ns= 9.00c 1096 AVX :VDIVPD (0.0/x) L: 2.24ns= 9.0c T: 1.12ns= 4.50c 1097 AVX :VDIVPD (x/1.0) L: 2.24ns= 9.0c T: 1.12ns= 4.50c 1098 AVX :VDIVPD (x/2.0) L: 1.81ns= 7.3c T: 1.12ns= 4.50c 1099 AVX :VDIVPD (x/0.5) L: 1.81ns= 7.3c T: 1.12ns= 4.50c 1100 SSE2 :SQRTSD xmm, xmm L: 9.47ns= 38.0c T: 3.61ns= 14.50c 1101 SSE2 :SQRTSD (0.0) L: 2.24ns= 9.0c T: 1.12ns= 4.50c 1102 SSE2 :SQRTSD (1.0) L: 2.24ns= 9.0c T: 1.12ns= 4.50c 1103 AVX :VSQRTSD xmm, xmm L: 9.47ns= 38.0c T: 4.44ns= 17.83c 1104 AVX :VSQRTSD (0.0) L: 2.24ns= 9.0c T: 1.33ns= 5.33c 1105 AVX :VSQRTSD (1.0) L: 2.24ns= 9.0c T: 1.33ns= 5.33c 1106 SSE2 :SQRTPD xmm, xmm L: 9.47ns= 38.0c T: 3.61ns= 14.50c 1107 SSE2 :SQRTPD (0.0) L: 2.24ns= 9.0c T: 1.12ns= 4.50c 1108 SSE2 :SQRTPD (1.0) L: 2.24ns= 9.0c T: 1.12ns= 4.50c 1109 AVX :VSQRTPD xmm, xmm L: 9.47ns= 38.0c T: 3.61ns= 14.50c 1110 AVX :VSQRTPD (0.0) L: 2.24ns= 9.0c T: 1.12ns= 4.50c 1111 AVX :VSQRTPD (1.0) L: 2.24ns= 9.0c T: 1.12ns= 4.50c 1112 SSE2 :CVTPI2PD xmm, mm L: [diff. reg. set] T: 0.25ns= 1.00c 1113 SSE2 :CVTPD2PI mm, xmm L: [diff. reg. set] T: 0.25ns= 1.00c 1114 SSE2 :CVTPD2PI + CVTPI2PD L: 3.49ns= 14.0c T: 0.25ns= 1.00c 1115 SSE2 :CVTTPD2PI mm, xmm L: [diff. reg. set] T: 0.25ns= 1.00c 1116 SSE2 :CVTTPD2PI + CVTPI2PD L: 3.49ns= 14.0c T: 0.25ns= 1.00c 1117 SSE2 :CVTSI2SD xmm, r32 L: [diff. reg. set] T: 0.27ns= 1.08c 1118 SSE2 :CVTSD2SI r32, xmm L: [diff. reg. set] T: 0.25ns= 1.00c 1119 SSE2 :CVTSD2SI + CVTSI2SD r32 L: 6.48ns= 26.0c T: 0.17ns= 0.68c 1120 SSE2 :CVTTSD2SI r32, xmm L: [diff. reg. set] T: 0.25ns= 1.00c 1121 SSE2 :CVTTSD2SI + CVTSI2SD r32 L: 6.48ns= 26.0c T: 0.17ns= 0.68c 1122 AVX :VCVTSI2SD xmm, xmm, r32 L: [diff. reg. set] T: 0.27ns= 1.08c 1123 AVX :VCVTSD2SI r32, xmm L: [diff. reg. set] T: 0.25ns= 1.00c 1124 AVX :VCVTSD2SI + VCVTSI2SD r32 L: 6.48ns= 26.0c T: 0.37ns= 1.50c 1125 AVX :VCVTTSD2SI r32, xmm L: [diff. reg. set] T: 0.25ns= 1.00c 1126 AVX :VCVTTSD2SI + VCVTSI2SD r32 L: 6.48ns= 26.0c T: 0.37ns= 1.50c 1137 SSE2 :CVTDQ2PD xmm, xmm L: 1.99ns= 8.0c T: 0.25ns= 1.00c 1138 SSE2 :CVTPD2DQ xmm, xmm L: 1.99ns= 8.0c T: 0.25ns= 1.00c 1139 SSE2 :CVTPD2DQ + CVTDQ2PD L: 3.49ns= 14.0c T: 0.25ns= 1.00c 1140 SSE2 :CVTTPD2DQ xmm, xmm L: 1.99ns= 8.0c T: 0.25ns= 1.00c 1141 SSE2 :CVTTPD2DQ + CVTDQ2PD L: 3.49ns= 14.0c T: 0.25ns= 1.00c 1142 AVX :VCVTDQ2PD xmm, xmm L: 1.99ns= 8.0c T: 0.25ns= 1.00c 1143 AVX :VCVTPD2DQ xmm, xmm L: 1.99ns= 8.0c T: 0.25ns= 1.00c 1144 AVX :VCVTPD2DQ + VCVTDQ2PD L: 3.49ns= 14.0c T: 0.25ns= 1.00c 1145 AVX :VCVTTPD2DQ xmm, xmm L: 1.99ns= 8.0c T: 0.25ns= 1.00c 1146 AVX :VCVTTPD2DQ + VCVTDQ2PD L: 3.49ns= 14.0c T: 0.25ns= 1.00c 1147 SSE2 :CVTDQ2PS xmm, xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1148 SSE2 :CVTPS2DQ xmm, xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1149 SSE2 :CVTPS2DQ + CVTDQ2PS L: 1.99ns= 8.0c T: 0.50ns= 2.00c 1150 SSE2 :CVTTPS2DQ xmm, xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1151 SSE2 :CVTTPS2DQ + CVTDQ2PS L: 1.99ns= 8.0c T: 0.50ns= 2.00c 1152 AVX :VCVTDQ2PS xmm, xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1153 AVX :VCVTPS2DQ xmm, xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1154 AVX :VCVTPS2DQ + VCVTDQ2PS L: 1.99ns= 8.0c T: 0.50ns= 2.00c 1155 AVX :VCVTTPS2DQ xmm, xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1156 AVX :VCVTTPS2DQ + VCVTDQ2PS L: 1.99ns= 8.0c T: 0.50ns= 2.00c 1157 SSE2 :CVTPS2PD xmm, xmm L: 1.99ns= 8.0c T: 0.25ns= 1.00c 1158 SSE2 :CVTPD2PS xmm, xmm L: 1.99ns= 8.0c T: 0.25ns= 1.00c 1159 SSE2 :CVTPD2PS + CVTPS2PD L: 3.49ns= 14.0c T: 0.25ns= 1.00c 1160 SSE2 :CVTSS2SD xmm, xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1161 SSE2 :CVTSD2SS xmm, xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1162 SSE2 :CVTSD2SS + CVTSS2SD L: 1.99ns= 8.0c T: 0.50ns= 2.00c 1163 AVX :VCVTPS2PD xmm, xmm L: 1.99ns= 8.0c T: 0.25ns= 1.00c 1164 AVX :VCVTPD2PS xmm, xmm L: 1.99ns= 8.0c T: 0.25ns= 1.00c 1165 AVX :VCVTPD2PS + VCVTPS2PD L: 3.49ns= 14.0c T: 0.25ns= 1.00c 1166 AVX :VCVTSS2SD xmm, xmm, xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1167 AVX :VCVTSD2SS xmm, xmm, xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1168 AVX :VCVTSD2SS + VCVTSS2SD L: 1.99ns= 8.0c T: 0.50ns= 2.00c 1169 SSE2 :MOVD r32, xmm L: [diff. reg. set] T: 0.25ns= 1.00c 1170 SSE2 :MOVD xmm, r32 L: [diff. reg. set] T: 0.25ns= 1.00c 1171 SSE2 :MOVD r32, xmm+MOVD xmm, r32 L: 4.48ns= 18.0c T: 0.09ns= 0.35c 1172 AVX :VMOVD r32, xmm L: [diff. reg. set] T: 0.25ns= 1.00c 1173 AVX :VMOVD xmm, r32 L: [diff. reg. set] T: 0.25ns= 1.00c 1174 AVX :VMOVD r32,xmm+VMOVD xmm,r32 L: 4.48ns= 18.0c T: 0.25ns= 1.00c 1181 SSE2 :MOVD xmm, [m32] L: [memory dep.] T: 0.12ns= 0.50c 1182 SSE2 :MOVD [m32], xmm L: [memory dep.] T: 0.25ns= 1.00c 1183 SSE2 :MOVD LS pair L: 2.74ns= 11.0c T: 0.16ns= 0.63c 1184 AVX :VMOVD xmm, [m32] L: [memory dep.] T: 0.12ns= 0.50c 1185 AVX :VMOVD [m32], xmm L: [memory dep.] T: 0.25ns= 1.00c 1186 AVX :VMOVD LS pair L: 2.74ns= 11.0c T: 0.20ns= 0.80c 1187 SSE2 :MOVQ xmm, [m64] L: [memory dep.] T: 0.12ns= 0.50c 1188 SSE2 :MOVQ [m64], xmm L: [memory dep.] T: 0.25ns= 1.00c 1189 SSE2 :MOVQ LS pair L: 2.74ns= 11.0c T: 0.15ns= 0.61c 1190 AVX :VMOVQ xmm, [m64] L: [memory dep.] T: 0.12ns= 0.50c 1191 AVX :VMOVQ [m64], xmm L: [memory dep.] T: 0.25ns= 1.00c 1192 AVX :VMOVQ LS pair L: 2.74ns= 11.0c T: 0.16ns= 0.62c 1193 SSE2 :MOVDQ2Q mm, xmm L: [diff. reg. set] T: 0.12ns= 0.50c 1194 SSE2 :MOVQ2DQ xmm, mm L: [diff. reg. set] T: 0.12ns= 0.50c 1195 SSE2 :MOVDQ2Q + MOVQ2DQ xmm, mm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1196 SSE2 :MOVDQA xmm, xmm L: 0.06ns= 0.2c T: 0.06ns= 0.24c 1197 SSE2 :MOVDQA xmm, [m128] L: [memory dep.] T: 0.12ns= 0.50c 1198 SSE2 :MOVDQA [m128], xmm L: [memory dep.] T: 0.25ns= 1.00c 1199 SSE2 :MOVDQA LS pair L: 2.74ns= 11.0c T: 0.16ns= 0.63c 1200 AVX :VMOVDQA xmm, xmm L: 0.06ns= 0.2c T: 0.06ns= 0.24c 1201 AVX :VMOVDQA xmm, [m128] L: [memory dep.] T: 0.12ns= 0.50c 1202 AVX :VMOVDQA [m128], xmm L: [memory dep.] T: 0.25ns= 1.00c 1203 AVX :VMOVDQA LS pair L: 0.25ns= 1.0c T: 0.25ns= 1.00c 1204 SSE2 :MOVDQU xmm, xmm L: 0.06ns= 0.2c T: 0.06ns= 0.24c 1205 SSE2 :MOVDQU xmm, [m128] L: [memory dep.] T: 0.12ns= 0.50c 1206 SSE2 :MOVDQU [m128], xmm L: [memory dep.] T: 0.25ns= 1.00c 1207 SSE2 :MOVDQU aligned LS pair L: 2.74ns= 11.0c T: 0.16ns= 0.63c 1208 SSE2 :MOVDQU xmm, [m128 + 4] L: [memory dep.] T: 0.25ns= 1.00c 1209 SSE2 :MOVDQU [m128 + 4], xmm L: [memory dep.] T: 0.50ns= 2.00c 1210 SSE2 :MOVDQU unaligned LS pair L: 4.11ns= 16.5c T: 0.33ns= 1.33c 1211 AVX :VMOVDQU xmm, xmm L: 0.06ns= 0.2c T: 0.06ns= 0.25c 1212 AVX :VMOVDQU xmm, [m128] L: [memory dep.] T: 0.12ns= 0.50c 1213 AVX :VMOVDQU [m128], xmm L: [memory dep.] T: 0.25ns= 1.00c 1214 AVX :VMOVDQU aligned LS pair L: 2.74ns= 11.0c T: 0.16ns= 0.63c 1215 AVX :VMOVDQU xmm, [m128 + 4] L: [memory dep.] T: 0.66ns= 2.67c 1216 AVX :VMOVDQU [m128 + 4], xmm L: [memory dep.] T: 0.50ns= 2.00c 1217 AVX :VMOVDQU unaligned LS pair L: 4.11ns= 16.5c T: 0.33ns= 1.33c 1218 SSE4.1:MOVNTDQA xmm, [m128] L: [memory dep.] T: 0.50ns= 0.50c 1219 SSE2 :MOVNTDQ [m128], xmm L: [memory dep.] T: 2.00ns= 2.00c 1220 SSE4.1:MOVNTDQA + MOVNTDQ L: 2.74ns= 11.0c T: 11.00ns= 11.00c 1221 AVX :VMOVNTDQA xmm, [m128] L: [memory dep.] T: 0.50ns= 0.50c 1222 AVX :VMOVNTDQ [m128], xmm L: [memory dep.] T: 2.00ns= 2.00c 1223 AVX :VMOVNTDQA + VMOVNTDQ L: 2.74ns= 11.0c T: 11.00ns= 11.00c 1224 SSE2 :PMOVMSKB r32, xmm L: [diff. reg. set] T: 0.25ns= 1.00c 1226 AVX :VPMOVMSKB r32, xmm L: [diff. reg. set] T: 0.25ns= 1.00c 1228 SSE2 :MASKMOVDQU xmm, xmm L: [memory dep.] T: 102.00ns=102.00c 1229 AVX :VMASKMOVDQU xmm, xmm L: [memory dep.] T: 88.50ns= 88.50c 1230 SSE2 :PADDB xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1231 AVX :VPADDB xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1232 SSE2 :PADDW xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1233 AVX :VPADDW xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1234 SSE2 :PADDD xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1235 AVX :VPADDD xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1236 SSE2 :PADDQ xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1237 AVX :VPADDQ xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1238 SSE2 :PADDSB xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1239 AVX :VPADDSB xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1240 SSE2 :PADDSW xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1241 AVX :VPADDSW xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1242 SSE2 :PADDUSB xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1243 AVX :VPADDUSB xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1244 SSE2 :PADDUSW xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1245 AVX :VPADDUSW xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1246 SSE2 :PSUBB xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1247 SSE2 :PSUBB xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1248 AVX :VPSUBB xmm, xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1249 AVX :VPSUBB xmm1, xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1250 SSE2 :PSUBW xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1251 SSE2 :PSUBW xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1252 AVX :VPSUBW xmm, xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1253 AVX :VPSUBW xmm1, xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1254 SSE2 :PSUBD xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1255 SSE2 :PSUBD xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1256 AVX :VPSUBD xmm, xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1257 AVX :VPSUBD xmm1, xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1258 SSE2 :PSUBQ xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1259 SSE2 :PSUBQ xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1260 AVX :VPSUBQ xmm, xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1261 AVX :VPSUBQ xmm1, xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1262 SSE2 :PSUBSB xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1263 SSE2 :PSUBSB xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1264 AVX :VPSUBSB xmm, xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1265 AVX :VPSUBSB xmm1, xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1266 SSE2 :PSUBSW xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1267 SSE2 :PSUBSW xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1268 AVX :VPSUBSW xmm, xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1269 AVX :VPSUBSW xmm1, xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1270 SSE2 :PSUBUSB xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1271 SSE2 :PSUBUSB xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1272 AVX :VPSUBUSB xmm, xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1273 AVX :VPSUBUSB xmm1, xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1274 SSE2 :PSUBUSW xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1275 SSE2 :PSUBUSW xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1276 AVX :VPSUBUSW xmm, xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1277 AVX :VPSUBUSW xmm1, xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1278 SSE2 :PCMPEQB xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1279 SSE2 :PCMPEQB xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1280 AVX :VPCMPEQB xmm, xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1281 AVX :VPCMPEQB xmm1, xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1282 SSE2 :PCMPEQW xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1283 SSE2 :PCMPEQW xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1284 AVX :VPCMPEQW xmm, xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1285 AVX :VPCMPEQW xmm1, xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1286 SSE2 :PCMPEQD xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1287 SSE2 :PCMPEQD xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1288 AVX :VPCMPEQD xmm, xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1289 AVX :VPCMPEQD xmm1, xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1290 SSE4.1:PCMPEQQ xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1291 SSE4.1:PCMPEQQ xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1292 AVX :VPCMPEQQ xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1293 AVX :VPCMPEQQ xmm1, xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1294 SSE2 :PCMPGTB xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1295 SSE2 :PCMPGTB xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1296 AVX :VPCMPGTB xmm, xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1297 AVX :VPCMPGTB xmm1, xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1298 SSE2 :PCMPGTW xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1299 SSE2 :PCMPGTW xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1300 AVX :VPCMPGTW xmm, xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1301 AVX :VPCMPGTW xmm1, xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1302 SSE2 :PCMPGTD xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1303 SSE2 :PCMPGTD xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1304 AVX :VPCMPGTD xmm, xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1305 AVX :VPCMPGTD xmm1, xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1306 SSE4.2:PCMPGTQ xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1307 SSE4.2:PCMPGTQ xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1308 AVX :VPCMPGTQ xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1309 AVX :VPCMPGTQ xmm1, xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1310 SSE2 :PAND xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1311 SSE2 :PAND xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1312 AVX :VPAND xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1313 AVX :VPAND xmm1, xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1314 SSE2 :PANDN xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1315 SSE2 :PANDN xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1316 AVX :VPANDN xmm, xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1317 AVX :VPANDN xmm1, xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1318 SSE2 :POR xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1319 SSE2 :POR xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1320 AVX :VPOR xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1321 AVX :VPOR xmm1, xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1322 SSE2 :PXOR xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1323 SSE2 :PXOR xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1324 AVX :VPXOR xmm, xmm, xmm L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1325 AVX :VPXOR xmm1, xmm1, xmm2 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1326 SSE2 :PMULHW xmm, xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1327 AVX :VPMULHW xmm, xmm, xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1328 SSE2 :PMULHUW xmm, xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1329 AVX :VPMULHUW xmm, xmm, xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1330 SSSE3 :PMULHRSW xmm, xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1331 AVX :VPMULHRSW xmm, xmm, xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1332 SSE2 :PMULLW xmm, xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1333 AVX :VPMULLW xmm, xmm, xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1334 SSE4.1:PMULLD xmm, xmm L: 1.25ns= 5.0c T: 0.50ns= 2.00c 1335 AVX :VPMULLD xmm, xmm, xmm L: 1.25ns= 5.0c T: 0.50ns= 2.00c 1336 SSE4.1:PMULDQ xmm, xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1337 AVX :VPMULDQ xmm, xmm, xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1338 SSE2 :PMULUDQ xmm, xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1339 AVX :VPMULUDQ xmm, xmm, xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1340 SSSE3 :PMADDUBSW xmm, xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1341 AVX :VPMADDUBSW xmm, xmm, xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1342 SSE2 :PMADDWD xmm, xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1343 AVX :VPMADDWD xmm, xmm, xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1344 SSE2 :PSLLW xmm, xmm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1345 AVX :VPSLLW xmm, xmm, xmm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1346 SSE2 :PSLLW xmm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1347 AVX :VPSLLW xmm, xmm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1348 SSE2 :PSLLD xmm, xmm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1349 AVX :VPSLLD xmm, xmm, xmm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1350 SSE2 :PSLLD xmm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1351 AVX :VPSLLD xmm, xmm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1352 SSE2 :PSLLQ xmm, xmm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1353 AVX :VPSLLQ xmm, xmm, xmm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1354 SSE2 :PSLLQ xmm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1355 AVX :VPSLLQ xmm, xmm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1356 SSE2 :PSLLDQ xmm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1357 AVX :VPSLLDQ xmm, xmm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1358 SSE2 :PSRAW xmm, xmm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1359 AVX :VPSRAW xmm, xmm, xmm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1360 SSE2 :PSRAW xmm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1361 AVX :VPSRAW xmm, xmm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1362 SSE2 :PSRAD xmm, xmm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1363 AVX :VPSRAD xmm, xmm, xmm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1364 SSE2 :PSRAD xmm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1365 AVX :VPSRAD xmm, xmm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1366 SSE2 :PSRLW xmm, xmm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1367 AVX :VPSRLW xmm, xmm, xmm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1368 SSE2 :PSRLW xmm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1369 AVX :VPSRLW xmm, xmm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1370 SSE2 :PSRLD xmm, xmm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1371 AVX :VPSRLD xmm, xmm, xmm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1372 SSE2 :PSRLD xmm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1373 AVX :VPSRLD xmm, xmm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1374 SSE2 :PSRLQ xmm, xmm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1375 AVX :VPSRLQ xmm, xmm, xmm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1376 SSE2 :PSRLQ xmm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1377 AVX :VPSRLQ xmm, xmm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1378 SSE2 :PSRLDQ xmm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1379 AVX :VPSRLDQ xmm, xmm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1380 SSE2 :PUNPCKHBW xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1381 AVX :VPUNPCKHBW xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1382 SSE2 :PUNPCKHWD xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1383 AVX :VPUNPCKHWD xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1384 SSE2 :PUNPCKHDQ xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1385 AVX :VPUNPCKHDQ xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1386 SSE2 :PUNPCKHQDQ xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1387 AVX :VPUNPCKHQDQ xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1388 SSE2 :PUNPCKLBW xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1389 AVX :VPUNPCKLBW xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1390 SSE2 :PUNPCKLWD xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1391 AVX :VPUNPCKLWD xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1392 SSE2 :PUNPCKLDQ xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1393 AVX :VPUNPCKLDQ xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1394 SSE2 :PUNPCKLQDQ xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1395 AVX :VPUNPCKLQDQ xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1396 SSE2 :PACKSSWB xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1397 AVX :VPACKSSWB xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1398 SSE2 :PACKUSWB xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1399 AVX :VPACKUSWB xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1400 SSE2 :PACKSSDW xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1401 AVX :VPACKSSDW xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1402 SSE4.1:PACKUSDW xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1403 AVX :VPACKUSDW xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1404 SSE2 :PAVGB xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1405 AVX :VPAVGB xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1406 SSE2 :PAVGW xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1407 AVX :VPAVGW xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1408 SSE4.1:PEXTRB r32, xmm, im8 L: [diff. reg. set] T: 0.25ns= 1.00c 1409 SSE4.1:PINSRB xmm, r32, im8 L: [diff. reg. set] T: 0.29ns= 1.17c 1410 SSE4.1:PEXTRB + PINSRB r32 L: 5.48ns= 22.0c T: 0.31ns= 1.25c 1411 AVX :VPEXTRB r32, xmm, im8 L: [diff. reg. set] T: 0.25ns= 1.00c 1412 AVX :VPINSRB xmm, r32, im8 L: [diff. reg. set] T: 0.27ns= 1.08c 1413 AVX :VPEXTRB + VPINSRB r32 L: 5.48ns= 22.0c T: 0.37ns= 1.50c 1418 SSE2 :PEXTRW r32, xmm, im8 L: [diff. reg. set] T: 0.25ns= 1.00c 1419 SSE2 :PINSRW xmm, r32, im8 L: [diff. reg. set] T: 0.29ns= 1.17c 1420 SSE2 :PEXTRW + PINSRW r32 L: 5.48ns= 22.0c T: 0.31ns= 1.25c 1421 AVX :VPEXTRW r32, xmm, im8 L: [diff. reg. set] T: 0.25ns= 1.00c 1422 AVX :VPINSRW xmm, r32, im8 L: [diff. reg. set] T: 0.29ns= 1.17c 1423 AVX :VPEXTRW + VPINSRW r32 L: 5.48ns= 22.0c T: 0.33ns= 1.33c 1428 SSE4.1:PEXTRD r32, xmm, im8 L: [diff. reg. set] T: 0.25ns= 1.00c 1429 SSE4.1:PINSRD xmm, r32, im8 L: [diff. reg. set] T: 0.29ns= 1.17c 1430 SSE4.1:PEXTRD + PINSRD r32 L: 5.48ns= 22.0c T: 0.31ns= 1.25c 1431 AVX :VPEXTRD r32, xmm, im8 L: [diff. reg. set] T: 0.25ns= 1.00c 1432 AVX :VPINSRD xmm, r32, im8 L: [diff. reg. set] T: 0.27ns= 1.08c 1433 AVX :VPEXTRD + VPINSRD r32 L: 5.48ns= 22.0c T: 0.39ns= 1.58c 1440 SSE4.1:EXTRACTPS r32, xmm, im8 L: [diff. reg. set] T: 0.25ns= 1.00c 1441 AVX :VEXTRACTPS r32, xmm, im8 L: [diff. reg. set] T: 0.25ns= 1.00c 1444 SSE4.1:INSERTPS xmm, xmm, im8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1445 AVX :VINSERTPS xmm, xmm, im8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1446 SSE4A :EXTRQ xmm, im8, im8 L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1447 SSE4A :EXTRQ xmm, xmm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1448 SSE4A :INSERTQ xmm, xmm, im8, im8 L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1449 SSE4A :INSERTQ xmm, xmm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1450 SSE2 :PMAXUB xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1451 AVX :VPMAXUB xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1452 SSE4.1:PMAXSB xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1453 AVX :VPMAXSB xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1454 SSE4.1:PMAXUW xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1455 AVX :VPMAXUW xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1456 SSE2 :PMAXSW xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1457 AVX :VPMAXSW xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1458 SSE4.1:PMAXUD xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1459 AVX :VPMAXUD xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1460 SSE4.1:PMAXSD xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1461 AVX :VPMAXSD xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1462 SSE2 :PMINUB xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1463 AVX :VPMINUB xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1464 SSE4.1:PMINSB xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1465 AVX :VPMINSB xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1466 SSE4.1:PMINUW xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1467 AVX :VPMINUW xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1468 SSE2 :PMINSW xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1469 AVX :VPMINSW xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1470 SSE4.1:PMINUD xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1471 AVX :VPMINUD xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1472 SSE4.1:PMINSD xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1473 AVX :VPMINSD xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1474 SSE2 :PSADBW xmm, xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1475 AVX :VPSADBW xmm, xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1476 SSSE3 :PSHUFB xmm, xmm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1477 AVX :VPSHUFB xmm, xmm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1478 SSE2 :PSHUFLW xmm, xmm, im8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1479 AVX :VPSHUFLW xmm, xmm, im8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1480 SSE2 :PSHUFHW xmm, xmm, im8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1481 AVX :VPSHUFHW xmm, xmm, im8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1482 SSE2 :PSHUFD xmm, xmm, im8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1483 AVX :VPSHUFD xmm, xmm, im8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1484 SSE3 :ADDSUBPS xmm, xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1485 AVX :VADDSUBPS xmm, xmm, xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1486 SSE3 :ADDSUBPD xmm, xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1487 AVX :VADDSUBPD xmm, xmm, xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1488 SSE3 :HADDPS xmm, xmm L: 2.74ns= 11.0c T: 0.50ns= 2.00c 1489 AVX :VHADDPS xmm, xmm, xmm L: 2.74ns= 11.0c T: 0.50ns= 2.00c 1490 SSE3 :HADDPD xmm, xmm L: 2.74ns= 11.0c T: 0.50ns= 2.00c 1491 AVX :VHADDPD xmm, xmm, xmm L: 2.74ns= 11.0c T: 0.50ns= 2.00c 1492 SSE3 :HSUBPS xmm, xmm L: 2.74ns= 11.0c T: 0.50ns= 2.00c 1493 AVX :VHSUBPS xmm, xmm, xmm L: 2.74ns= 11.0c T: 0.50ns= 2.00c 1494 SSE3 :HSUBPD xmm, xmm L: 2.74ns= 11.0c T: 0.50ns= 2.00c 1495 AVX :VHSUBPD xmm, xmm, xmm L: 2.74ns= 11.0c T: 0.50ns= 2.00c 1496 SSE3 :MOVSLDUP xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1497 AVX :VMOVSLDUP xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1498 SSE3 :MOVSHDUP xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1499 AVX :VMOVSHDUP xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1500 SSE3 :MOVDDUP xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1501 AVX :VMOVDDUP xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1502 SSE3 :LDDQU xmm, [m128 + 4] L: [memory dep.] T: 0.50ns= 2.00c 1503 AVX :VLDDQU xmm, [m128 + 4] L: [memory dep.] T: 0.69ns= 2.75c 1504 SSSE3 :PABSB xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1505 AVX :VPABSB xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1506 SSSE3 :PABSW xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1507 AVX :VPABSW xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1508 SSSE3 :PABSD xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1509 AVX :VPABSD xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1510 SSSE3 :PALIGNR xmm, xmm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1511 AVX :VPALIGNR xmm, xmm, xmm, im8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1512 SSSE3 :PHADDD xmm, xmm L: 1.25ns= 5.0c T: 0.50ns= 2.00c 1513 AVX :VPHADDD xmm, xmm, xmm L: 1.25ns= 5.0c T: 0.50ns= 2.00c 1514 SSSE3 :PHADDW xmm, xmm L: 1.25ns= 5.0c T: 0.50ns= 2.00c 1515 AVX :VPHADDW xmm, xmm, xmm L: 1.25ns= 5.0c T: 0.50ns= 2.00c 1516 SSSE3 :PHADDSW xmm, xmm L: 1.25ns= 5.0c T: 0.50ns= 2.00c 1517 AVX :VPHADDSW xmm, xmm, xmm L: 1.25ns= 5.0c T: 0.50ns= 2.00c 1518 SSSE3 :PHSUBD xmm, xmm L: 1.25ns= 5.0c T: 0.50ns= 2.00c 1519 AVX :VPHSUBD xmm, xmm, xmm L: 1.25ns= 5.0c T: 0.50ns= 2.00c 1520 SSSE3 :PHSUBW xmm, xmm L: 1.25ns= 5.0c T: 0.50ns= 2.00c 1521 AVX :VPHSUBW xmm, xmm, xmm L: 1.25ns= 5.0c T: 0.50ns= 2.00c 1522 SSSE3 :PHSUBSW xmm, xmm L: 1.25ns= 5.0c T: 0.50ns= 2.00c 1523 AVX :VPHSUBSW xmm, xmm, xmm L: 1.25ns= 5.0c T: 0.50ns= 2.00c 1524 SSSE3 :PSIGNB xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1525 AVX :VPSIGNB xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1526 SSSE3 :PSIGNW xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1527 AVX :VPSIGNW xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1528 SSSE3 :PSIGND xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1529 AVX :VPSIGND xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1530 SSE4.1:BLENDPS xmm, xmm, imm8 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1531 AVX :VBLENDPS xmm, xmm, xmm, im8 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1532 SSE4.1:BLENDVPS xmm, xmm, L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1533 AVX :VBLENDVPS xmm, xmm, xmm, xm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1534 SSE4.1:BLENDPD xmm, xmm, imm8 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1535 AVX :VBLENDPD xmm, xmm, xmm, im8 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1536 SSE4.1:BLENDVPD xmm, xmm, L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1537 AVX :VBLENDVPD xmm, xmm, xmm, xm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1538 SSE4.1:PBLENDW xmm, xmm, imm8 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1539 AVX :VPBLENDW xmm, xmm, xmm, im8 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1540 SSE4.1:PBLENDVB xmm, xmm, L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1541 AVX :VPBLENDVB xmm, xmm, xmm, xm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1542 SSE4.1:DPPS xmm, xmm, imm8 L: 6.23ns= 25.0c T: 1.31ns= 5.25c 1543 AVX :VDPPS xmm, xmm, xmm, imm8 L: 7.47ns= 30.0c T: 1.62ns= 6.50c 1544 SSE4.1:DPPD xmm, xmm, imm8 L: 3.74ns= 15.0c T: 1.22ns= 4.92c 1545 AVX :VDPPD xmm, xmm, xmm, imm8 L: 3.74ns= 15.0c T: 1.22ns= 4.92c 1546 SSE4.1:MPSADBW xmm, xmm, imm8 L: 2.49ns= 10.0c T: 1.00ns= 4.00c 1547 AVX :VMPSADBW xmm, xmm, imm8 L: 2.49ns= 10.0c T: 1.00ns= 4.00c 1548 SSE4.1:PHMINPOSUW xmm, xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1549 AVX :VPHMINPOSUW xmm, xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1550 SSE4.1:PMOVSXBW xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1551 AVX :VPMOVSXBW xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1552 SSE4.1:PMOVSXBD xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1553 AVX :VPMOVSXBD xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1554 SSE4.1:PMOVSXBQ xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1555 AVX :VPMOVSXBQ xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1556 SSE4.1:PMOVSXWD xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1557 AVX :VPMOVSXWD xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1558 SSE4.1:PMOVSXWQ xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1559 AVX :VPMOVSXWQ xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1560 SSE4.1:PMOVSXDQ xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1561 AVX :VPMOVSXDQ xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1562 SSE4.1:PMOVZXBW xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1563 AVX :VPMOVZXBW xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1564 SSE4.1:PMOVZXBD xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1565 AVX :VPMOVZXBD xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1566 SSE4.1:PMOVZXBQ xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1567 AVX :VPMOVZXBQ xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1568 SSE4.1:PMOVZXWD xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1569 AVX :VPMOVZXWD xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1570 SSE4.1:PMOVZXWQ xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1571 AVX :VPMOVZXWQ xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1572 SSE4.1:PMOVZXDQ xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1573 AVX :VPMOVZXDQ xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1574 SSE4.1:PTEST xmm, xmm L: [no true dep.] T: 0.25ns= 1.00c 1575 AVX :VPTEST xmm, xmm L: [no true dep.] T: 0.25ns= 1.00c 1576 AVX :VPTESTPS xmm, xmm L: [no true dep.] T: 0.25ns= 1.00c 1577 AVX :VPTESTPD xmm, xmm L: [no true dep.] T: 0.25ns= 1.00c 1578 SSE4.1:ROUNDSS xmm, xmm, imm8 L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1579 AVX :VROUNDSS xmm, xmm, xmm, im8 L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1580 SSE4.1:ROUNDPS xmm, xmm, imm8 L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1581 AVX :VROUNDPS xmm, xmm, imm8 L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1582 SSE4.1:ROUNDSD xmm, xmm, imm8 L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1583 AVX :VROUNDSD xmm, xmm, xmm, im8 L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1584 SSE4.1:ROUNDPD xmm, xmm, imm8 L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1585 AVX :VROUNDPD xmm, xmm, imm8 L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1586 AVX :VBROADCASTSS xmm, m32 L: [memory dep.] T: 0.25ns= 1.00c 1587 SSE4.2:PCMPESTRI xmm, xmm, imm8 L: 1.99ns= 8.0c T: 1.99ns= 8.00c 1588 AVX :VPCMPESTRI xmm, xmm, imm8 L: 1.99ns= 8.0c T: 1.99ns= 8.00c 1589 SSE4.2:PCMPESTRM xmm, xmm, imm8 L: 1.99ns= 8.0c T: 1.99ns= 8.00c 1590 AVX :VPCMPESTRM xmm, xmm, imm8 L: 1.99ns= 8.0c T: 1.99ns= 8.00c 1591 SSE4.2:PCMPISTRI xmm, xmm, imm8 L: 0.75ns= 3.0c T: 0.75ns= 3.00c 1592 AVX :VPCMPISTRI xmm, xmm, imm8 L: 0.75ns= 3.0c T: 0.75ns= 3.00c 1593 SSE4.2:PCMPISTRM xmm, xmm, imm8 L: 1.00ns= 4.0c T: 1.00ns= 4.00c 1594 AVX :VPCMPISTRM xmm, xmm, imm8 L: 1.00ns= 4.0c T: 1.00ns= 4.00c 1595 CLMUL :PCLMULQDQ xmm, xmm, imm8 L: 3.24ns= 13.0c T: 1.66ns= 6.67c 1596 AVX :VPCLMULQDQ xmm,xmm,xmm,im8 L: 3.24ns= 13.0c T: 1.66ns= 6.67c 1597 AESNI :AESENC xmm, xmm L: 2.24ns= 9.0c T: 0.25ns= 1.00c 1598 AVX :VAESENC xmm, xmm, xmm L: 2.24ns= 9.0c T: 0.25ns= 1.00c 1599 AESNI :AESENCLAST xmm, xmm L: 2.24ns= 9.0c T: 0.25ns= 1.00c 1600 AVX :VAESENCLAST xmm, xmm, xmm L: 2.24ns= 9.0c T: 0.25ns= 1.00c 1601 AESNI :AESDEC xmm, xmm L: 2.24ns= 9.0c T: 0.25ns= 1.00c 1602 AVX :VAESDEC xmm, xmm, xmm L: 2.24ns= 9.0c T: 0.25ns= 1.00c 1603 AESNI :AESDECLAST xmm, xmm L: 2.24ns= 9.0c T: 0.25ns= 1.00c 1604 AVX :VAESDECLAST xmm, xmm, xmm L: 2.24ns= 9.0c T: 0.25ns= 1.00c 1605 AESNI :AESIMC xmm, xmm L: 1.25ns= 5.0c T: 0.25ns= 1.00c 1606 AVX :VAESIMC xmm, xmm L: 1.25ns= 5.0c T: 0.25ns= 1.00c 1607 AESNI :AESKEYGEN xmm, xmm, imm8 L: 1.25ns= 5.0c T: 0.25ns= 1.00c 1608 AVX :VAESKEYGEN xmm, xmm, imm8 L: 1.25ns= 5.0c T: 0.25ns= 1.00c 1609 FMA4 :VFMADDSS xmm,xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1610 FMA3 :VFMADD132SS xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1611 FMA3 :VFMADD213SS xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1612 FMA3 :VFMADD231SS xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1613 FMA4 :VFMADDPS xmm,xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1614 FMA3 :VFMADD132PS xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1615 FMA3 :VFMADD213PS xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1616 FMA3 :VFMADD231PS xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1617 FMA4 :VFMSUBSS xmm,xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1618 FMA3 :VFMSUB132SS xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1619 FMA3 :VFMSUB213SS xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1620 FMA3 :VFMSUB231SS xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1621 FMA4 :VFMSUBPS xmm,xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1622 FMA3 :VFMSUB132PS xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1623 FMA3 :VFMSUB213PS xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1624 FMA3 :VFMSUB231PS xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1625 FMA4 :VFNMADDSS xmm,xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1626 FMA3 :VFNMADD132SS xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1627 FMA3 :VFNMADD213SS xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1628 FMA3 :VFNMADD231SS xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1629 FMA4 :VFNMADDPS xmm,xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1630 FMA3 :VFNMADD132PS xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1631 FMA3 :VFNMADD213PS xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1632 FMA3 :VFNMADD231PS xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1633 FMA4 :VFNMSUBSS xmm,xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1634 FMA3 :VFNMSUB132SS xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1635 FMA3 :VFNMSUB213SS xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1636 FMA3 :VFNMSUB231SS xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1637 FMA4 :VFNMSUBPS xmm,xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1638 FMA3 :VFNMSUB132PS xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1639 FMA3 :VFNMSUB213PS xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1640 FMA3 :VFNMSUB231PS xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1641 FMA4 :VFMADDSUBPS xmm,xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1642 FMA3 :VFMADDSUB132PS xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1643 FMA3 :VFMADDSUB213PS xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1644 FMA3 :VFMADDSUB231PS xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1645 FMA4 :VFMSUBADDPS xmm,xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1646 FMA3 :VFMSUBADD132PS xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1647 FMA3 :VFMSUBADD213PS xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1648 FMA3 :VFMSUBADD231PS xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1649 FMA4 :VFMADDSD xmm,xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1650 FMA3 :VFMADD132SD xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1651 FMA3 :VFMADD213SD xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1652 FMA3 :VFMADD231SD xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1653 FMA4 :VFMADDPD xmm,xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1654 FMA3 :VFMADD132PD xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1655 FMA3 :VFMADD213PD xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1656 FMA3 :VFMADD231PD xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1657 FMA4 :VFMSUBSD xmm,xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1658 FMA3 :VFMSUB132SD xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1659 FMA3 :VFMSUB213SD xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1660 FMA3 :VFMSUB231SD xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1661 FMA4 :VFMSUBPD xmm,xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1662 FMA3 :VFMSUB132PD xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1663 FMA3 :VFMSUB213PD xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1664 FMA3 :VFMSUB231PD xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1665 FMA4 :VFNMADDSD xmm,xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1666 FMA3 :VFNMADD132SD xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1667 FMA3 :VFNMADD213SD xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1668 FMA3 :VFNMADD231SD xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1669 FMA4 :VFNMADDPD xmm,xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1670 FMA3 :VFNMADD132PD xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1671 FMA3 :VFNMADD213PD xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1672 FMA3 :VFNMADD231PD xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1673 FMA4 :VFNMSUBSD xmm,xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1674 FMA3 :VFNMSUB132SD xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1675 FMA3 :VFNMSUB213SD xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1676 FMA3 :VFNMSUB231SD xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1677 FMA4 :VFNMSUBPD xmm,xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1678 FMA3 :VFNMSUB132PD xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1679 FMA3 :VFNMSUB213PD xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1680 FMA3 :VFNMSUB231PD xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1681 FMA4 :VFMADDSUBPD xmm,xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1682 FMA3 :VFMADDSUB132PD xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1683 FMA3 :VFMADDSUB213PD xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1684 FMA3 :VFMADDSUB231PD xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1685 FMA4 :VFMSUBADDPD xmm,xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1686 FMA3 :VFMSUBADD132PD xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1687 FMA3 :VFMSUBADD213PD xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1688 FMA3 :VFMSUBADD231PD xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1689 XOP :VFRCZSS xmm, xmm L: 2.49ns= 10.0c T: 0.25ns= 1.00c 1690 XOP :VFRCZPS xmm, xmm L: 2.49ns= 10.0c T: 0.25ns= 1.00c 1691 XOP :VFRCZSD xmm, xmm L: 2.49ns= 10.0c T: 0.25ns= 1.00c 1692 XOP :VFRCZPD xmm, xmm L: 2.49ns= 10.0c T: 0.25ns= 1.00c 1693 XOP :VPCMOV xmm, xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1694 XOP :VPCOMB xmm, xmm, xmm, imm8 L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1695 XOP :VPCOMB xm1, xm1, xm2, imm8 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1696 XOP :VPCOMW xmm, xmm, xmm, imm8 L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1697 XOP :VPCOMW xm1, xm1, xm2, imm8 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1698 XOP :VPCOMD xmm, xmm, xmm, imm8 L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1699 XOP :VPCOMD xm1, xm1, xm2, imm8 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1700 XOP :VPCOMQ xmm, xmm, xmm, imm8 L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1701 XOP :VPCOMQ xm1, xm1, xm2, imm8 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1702 XOP :VPCOMUB xmm, xmm, xmm, imm8 L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1703 XOP :VPCOMUB xm1, xm1, xm2, imm8 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1704 XOP :VPCOMUW xmm, xmm, xmm, imm8 L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1705 XOP :VPCOMUW xm1, xm1, xm2, imm8 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1706 XOP :VPCOMUD xmm, xmm, xmm, imm8 L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1707 XOP :VPCOMUD xm1, xm1, xm2, imm8 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1708 XOP :VPCOMUQ xmm, xmm, xmm, imm8 L: 0.12ns= 0.5c T: 0.12ns= 0.49c 1709 XOP :VPCOMUQ xm1, xm1, xm2, imm8 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1710 XOP :VPERMIL2PS xm,xm,xm,xm,im L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1711 XOP :VPERMIL2PD xm,xm,xm,xm,im L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1712 XOP :VPHADDBW xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1713 XOP :VPHADDBD xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1714 XOP :VPHADDBQ xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1715 XOP :VPHADDWD xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1716 XOP :VPHADDWQ xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1717 XOP :VPHADDDQ xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1718 XOP :VPHADDUBW xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1719 XOP :VPHADDUBD xmm, xmm L: 0.50ns= 2.0c T: 0.13ns= 0.50c 1720 XOP :VPHADDUBQ xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1721 XOP :VPHADDUWD xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1722 XOP :VPHADDUWQ xmm, xmm L: 0.50ns= 2.0c T: 0.13ns= 0.50c 1723 XOP :VPHADDUDQ xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1724 XOP :VPHSUBBW xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1725 XOP :VPHSUBWD xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1726 XOP :VPHSUBDQ xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1727 XOP :VPMACSWW xmm,xmm,xmm,xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1728 XOP :VPMACSWW xm1,xm2,xm2,xm1 L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1729 XOP :VPMACSWD xmm,xmm,xmm,xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1730 XOP :VPMACSWD xm1,xm2,xm2,xm1 L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1731 XOP :VPMACSDD xmm,xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.50ns= 2.00c 1732 XOP :VPMACSDD xm1,xm2,xm2,xm1 L: 1.25ns= 5.0c T: 0.50ns= 2.00c 1733 XOP :VPMACSDQL xmm,xmm,xmm,xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1734 XOP :VPMACSDQL xm1,xm2,xm2,xm1 L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1735 XOP :VPMACSDQH xmm,xmm,xmm,xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1736 XOP :VPMACSDQH xm1,xm2,xm2,xm1 L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1737 XOP :VPMACSSWW xmm,xmm,xmm,xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1738 XOP :VPMACSSWW xm1,xm2,xm2,xm1 L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1739 XOP :VPMACSSWD xmm,xmm,xmm,xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1740 XOP :VPMACSSWD xm1,xm2,xm2,xm1 L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1741 XOP :VPMACSSDD xmm,xmm,xmm,xmm L: 1.25ns= 5.0c T: 0.50ns= 2.00c 1742 XOP :VPMACSSDD xm1,xm2,xm2,xm1 L: 1.25ns= 5.0c T: 0.50ns= 2.00c 1743 XOP :VPMACSSDQL xmm,xmm,xmm,xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1744 XOP :VPMACSSDQL xm1,xm2,xm2,xm1 L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1745 XOP :VPMACSSDQH xmm,xmm,xmm,xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1746 XOP :VPMACSSDQH xm1,xm2,xm2,xm1 L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1747 XOP :VPMADCSWD xmm,xmm,xmm,xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1748 XOP :VPMADCSWD xm1,xm2,xm2,xm1 L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1749 XOP :VPMADCSSWD xmm,xmm,xmm,xmm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1750 XOP :VPMADCSSWD xm1,xm2,xm2,xm1 L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1751 XOP :VPPERM xmm, xmm, xmm, xmm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1752 XOP :VPROTB xmm, xmm, xmm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1753 XOP :VPROTB xmm, xmm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1754 XOP :VPROTW xmm, xmm, xmm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1755 XOP :VPROTW xmm, xmm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1756 XOP :VPROTD xmm, xmm, xmm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1757 XOP :VPROTD xmm, xmm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1758 XOP :VPROTQ xmm, xmm, xmm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1759 XOP :VPROTQ xmm, xmm, imm8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1760 XOP :VPSHAB xmm, xmm, xmm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1761 XOP :VPSHAW xmm, xmm, xmm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1762 XOP :VPSHAD xmm, xmm, xmm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1763 XOP :VPSHAQ xmm, xmm, xmm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1764 XOP :VPSHLB xmm, xmm, xmm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1765 XOP :VPSHLW xmm, xmm, xmm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1766 XOP :VPSHLD xmm, xmm, xmm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1767 XOP :VPSHLQ xmm, xmm, xmm L: 0.75ns= 3.0c T: 0.25ns= 1.00c 1768 F16C :VCVTPS2PH xmm, xmm, imm8 L: 1.99ns= 8.0c T: 0.50ns= 2.00c 1769 F16C :VCVTPH2PS xmm, xmm L: 1.99ns= 8.0c T: 0.50ns= 2.00c 1770 AVX :VMOVAPS ymm, ymm L: 0.50ns= 2.0c T: 0.15ns= 0.60c 1771 AVX :VMOVAPS ymm, [m256] L: [memory dep.] T: 0.25ns= 1.00c 1772 AVX :VMOVAPS [m256], ymm L: [memory dep.] T: 4.48ns= 18.00c 1773 AVX :VMOVAPS LS pair L: 4.73ns= 19.0c T: 4.73ns= 19.00c 1774 AVX :VMOVUPS ymm, ymm L: 0.50ns= 2.0c T: 0.15ns= 0.60c 1775 AVX :VMOVUPS ymm, [m256] L: [memory dep.] T: 0.25ns= 1.00c 1776 AVX :VMOVUPS [m256], ymm L: [memory dep.] T: 5.23ns= 21.00c 1777 AVX :VMOVUPS aligned LS pair L: 5.48ns= 22.0c T: 5.48ns= 22.00c 1778 AVX :VMOVUPS ymm, [m256 + 4] L: [memory dep.] T: 1.00ns= 4.00c 1779 AVX :VMOVUPS [m256 + 4], ymm L: [memory dep.] T: 5.73ns= 23.00c 1780 AVX :VMOVUPS unaligned LS pair L: 5.73ns= 23.0c T: 5.73ns= 23.00c 1781 AVX :VMOVSLDUP ymm, ymm, ymm L: 0.50ns= 2.0c T: 0.50ns= 2.00c 1782 AVX :VMOVSHDUP ymm, ymm, ymm L: 0.50ns= 2.0c T: 0.50ns= 2.00c 1783 AVX :VMOVNTPS [m256], ymm L: [memory dep.] T: 19.92ns= 19.92c 1784 AVX :VMOVMSKPS r32, ymm L: [diff. reg. set] T: 0.25ns= 1.00c 1785 AVX :VMASKMOVPS ymm,ymm,[m256+4] L: [memory dep.] T: 1.12ns= 4.50c 1786 AVX :VMASKMOVPS [m256+4],ymm,ymm L: [memory dep.] T: 46.90ns=188.25c 1787 AVX :VMASKMOVPS unaligned LSpair L: 51.32ns=206.0c T: 47.13ns=189.17c 1788 AVX :VUNPCKLPS ymm, ymm, ymm L: 0.50ns= 2.0c T: 0.50ns= 2.00c 1789 AVX :VUNPCKHPS ymm, ymm, ymm L: 0.50ns= 2.0c T: 0.50ns= 2.00c 1790 AVX :VSHUFPS ymm, ymm, ymm, imm8 L: 0.50ns= 2.0c T: 0.50ns= 2.00c 1791 AVX :VPERMILPS ymm, ymm, ymm L: 0.75ns= 3.0c T: 0.50ns= 2.00c 1792 AVX :VPERMILPS ymm, ymm, imm8 L: 0.50ns= 2.0c T: 0.50ns= 2.00c 1793 AVX :VCMPPS ymm, ymm, ymm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1794 AVX :VADDSUBPS ymm, ymm, ymm L: 1.25ns= 5.0c T: 0.25ns= 1.00c 1795 AVX :VHSUBPS ymm, ymm, ymm L: 2.74ns= 11.0c T: 1.00ns= 4.00c 1796 AVX :VHADDPS ymm, ymm, ymm L: 2.74ns= 11.0c T: 1.00ns= 4.00c 1797 AVX :VSUBPS ymm, ymm, ymm L: 1.25ns= 5.0c T: 0.25ns= 1.00c 1798 AVX :VADDPS ymm, ymm, ymm L: 1.25ns= 5.0c T: 0.25ns= 1.00c 1799 AVX :VMULPS ymm, ymm, ymm L: 1.25ns= 5.0c T: 0.25ns= 1.00c 1800 AVX :VMULPS+VADDPS ymm, ymm, ymm L: 2.49ns= 10.0c T: [not enough reg] 1801 AVX :VMULPS ymm1.. VADDPS ymm2.. L: 1.25ns= 5.0c T: [not enough reg] 1802 AVX :VMAXPS ymm, ymm, ymm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1803 AVX :VMINPS ymm, ymm, ymm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1804 AVX :VANDNPS ymm, ymm, ymm L: 0.25ns= 1.0c T: 0.25ns= 1.00c 1805 AVX :VANDNPS ymm1, ymm1, ymm2 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1806 AVX :VANDPS ymm, ymm, ymm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1807 AVX :VANDPS ymm1, ymm1, ymm2 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1808 AVX :VORPS ymm, ymm, ymm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1809 AVX :VORPS ymm1, ymm1, ymm2 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1810 AVX :VXORPS ymm, ymm, ymm L: 0.25ns= 1.0c T: 0.25ns= 1.00c 1811 AVX :VXORPS ymm1, ymm1, ymm2 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1812 AVX :VDIVPS ymm, ymm, ymm L: 5.98ns= 24.0c T: 4.73ns= 19.00c 1813 AVX :VDIVPS (0.0f/x) L: 2.24ns= 9.0c T: 2.24ns= 9.00c 1814 AVX :VDIVPS (x/1.0f) L: 2.24ns= 9.0c T: 2.24ns= 9.00c 1815 AVX :VDIVPS (x/2.0f) L: 2.24ns= 9.0c T: 2.24ns= 9.00c 1816 AVX :VDIVPS (x/0.5f) L: 2.24ns= 9.0c T: 2.24ns= 9.00c 1817 AVX :VSQRTPS ymm, ymm L: 7.23ns= 29.0c T: 5.98ns= 24.00c 1818 AVX :VSQRTPS (0.0f) L: 2.24ns= 9.0c T: 2.24ns= 9.00c 1819 AVX :VSQRTPS (1.0f) L: 2.24ns= 9.0c T: 2.24ns= 9.00c 1820 AVX :VRCPPS ymm, ymm, ymm L: 1.25ns= 5.0c T: 0.50ns= 2.00c 1821 AVX :VRSQRTPS ymm, ymm, ymm L: 1.25ns= 5.0c T: 0.50ns= 2.00c 1822 AVX :VBLENDPS ymm, ymm, ymm, im8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1823 AVX :VBLENDVPS ymm, ymm, ymm, ym L: 0.50ns= 2.0c T: 0.50ns= 2.00c 1824 AVX :VDPPS ymm, ymm, ymm, imm8 L: 6.73ns= 27.0c T: 2.78ns= 11.17c 1825 AVX :VPTESTPS ymm, ymm L: [no true dep.] T: 0.50ns= 2.00c 1826 AVX :VROUNDPS ymm, ymm, imm8 L: 1.00ns= 4.0c T: 0.50ns= 2.00c 1827 AVX :VMOVAPD ymm, ymm L: 0.50ns= 2.0c T: 0.15ns= 0.60c 1828 AVX :VMOVAPD ymm, [m256] L: [memory dep.] T: 0.25ns= 1.00c 1829 AVX :VMOVAPD [m256], ymm L: [memory dep.] T: 4.48ns= 18.00c 1830 AVX :VMOVAPD LS pair L: 4.73ns= 19.0c T: 4.73ns= 19.00c 1831 AVX :VMOVUPD ymm, ymm L: 0.50ns= 2.0c T: 0.15ns= 0.60c 1832 AVX :VMOVUPD ymm, [m256] L: [memory dep.] T: 0.25ns= 1.00c 1833 AVX :VMOVUPD [m256], ymm L: [memory dep.] T: 5.23ns= 21.00c 1834 AVX :VMOVUPD aligned LS pair L: 5.48ns= 22.0c T: 5.48ns= 22.00c 1835 AVX :VMOVUPD ymm, [m256 + 4] L: [memory dep.] T: 0.50ns= 2.00c 1836 AVX :VMOVUPD [m256 + 4], ymm L: [memory dep.] T: 5.73ns= 23.00c 1837 AVX :VMOVUPD unaligned LS pair L: 5.73ns= 23.0c T: 5.73ns= 23.00c 1838 AVX :VMOVDDUP ymm, ymm, ymm L: 0.50ns= 2.0c T: 0.50ns= 2.00c 1839 AVX :VMOVNTPD [m256], ymm L: [memory dep.] T: 19.92ns= 19.92c 1840 AVX :VMOVMSKPD r32, ymm L: [diff. reg. set] T: 0.25ns= 1.00c 1841 AVX :VMASKMOVPD ymm,ymm,[m256+4] L: [memory dep.] T: 0.75ns= 3.00c 1842 AVX :VMASKMOVPD [m256+4],ymm,ymm L: [memory dep.] T: 46.90ns=188.25c 1843 AVX :VMASKMOVPD unaligned LSpair L: 51.32ns=206.0c T: 47.13ns=189.17c 1844 AVX :VUNPCKLPD ymm, ymm, ymm L: 0.50ns= 2.0c T: 0.50ns= 2.00c 1845 AVX :VUNPCKHPD ymm, ymm, ymm L: 0.50ns= 2.0c T: 0.50ns= 2.00c 1846 AVX :VSHUFPD ymm, ymm, ymm, imm8 L: 0.50ns= 2.0c T: 0.50ns= 2.00c 1847 AVX :VPERMILPD ymm, ymm, ymm L: 0.75ns= 3.0c T: 0.50ns= 2.00c 1848 AVX :VPERMILPD ymm, ymm, imm8 L: 0.50ns= 2.0c T: 0.50ns= 2.00c 1849 AVX :VCMPPD ymm, ymm, ymm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1850 AVX :VADDSUBPD ymm, ymm, ymm L: 1.25ns= 5.0c T: 0.25ns= 1.00c 1851 AVX :VHSUBPD ymm, ymm, ymm L: 2.74ns= 11.0c T: 1.00ns= 4.00c 1852 AVX :VHADDPD ymm, ymm, ymm L: 2.74ns= 11.0c T: 1.00ns= 4.00c 1853 AVX :VSUBPD ymm, ymm, ymm L: 1.25ns= 5.0c T: 0.25ns= 1.00c 1854 AVX :VADDPD ymm, ymm, ymm L: 1.25ns= 5.0c T: 0.25ns= 1.00c 1855 AVX :VMULPD ymm, ymm, ymm L: 1.25ns= 5.0c T: 0.25ns= 1.00c 1856 AVX :VMULPD+VADDPD ymm, ymm, ymm L: 2.49ns= 10.0c T: [not enough reg] 1857 AVX :VMULPD ymm1.. VADDPD ymm2.. L: 1.25ns= 5.0c T: [not enough reg] 1858 AVX :VMAXPD ymm, ymm, ymm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1859 AVX :VMINPD ymm, ymm, ymm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1860 AVX :VANDNPD ymm, ymm, ymm L: 0.25ns= 1.0c T: 0.25ns= 1.00c 1861 AVX :VANDNPD ymm1, ymm1, ymm2 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1862 AVX :VANDPD ymm, ymm, ymm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1863 AVX :VANDPD ymm1, ymm1, ymm2 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1864 AVX :VORPD ymm, ymm, ymm L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1865 AVX :VORPD ymm1, ymm1, ymm2 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1866 AVX :VXORPD ymm, ymm, ymm L: 0.25ns= 1.0c T: 0.25ns= 1.00c 1867 AVX :VXORPD ymm1, ymm1, ymm2 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1868 AVX :VDIVPD ymm, ymm, ymm L: 6.73ns= 27.0c T: 4.48ns= 18.00c 1869 AVX :VDIVPD (0.0/x) L: 2.24ns= 9.0c T: 2.24ns= 9.00c 1870 AVX :VDIVPD (x/1.0) L: 2.24ns= 9.0c T: 2.24ns= 9.00c 1871 AVX :VDIVPD (x/2.0) L: 2.24ns= 9.0c T: 2.24ns= 9.00c 1872 AVX :VDIVPD (x/0.5) L: 2.24ns= 9.0c T: 2.24ns= 9.00c 1873 AVX :VSQRTPD ymm, ymm L: 9.47ns= 38.0c T: 7.23ns= 29.00c 1874 AVX :VSQRTPD (0.0) L: 2.24ns= 9.0c T: 2.24ns= 9.00c 1875 AVX :VSQRTPD (1.0) L: 2.24ns= 9.0c T: 2.24ns= 9.00c 1876 AVX :VBLENDPD ymm, ymm, ymm, im8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1877 AVX :VBLENDVPD ymm, ymm, ymm, ym L: 0.50ns= 2.0c T: 0.50ns= 2.00c 1878 AVX :VCVTDQ2PD ymm, xmm L: 1.99ns= 8.0c T: 0.50ns= 2.00c 1879 AVX :VCVTPD2DQ xmm, ymm L: 1.99ns= 8.0c T: 0.50ns= 2.00c 1880 AVX :VCVTPD2DQ + VCVTDQ2PD L: 3.74ns= 15.0c T: 0.93ns= 3.75c 1881 AVX :VCVTTPD2DQ xmm, ymm L: 1.99ns= 8.0c T: 0.50ns= 2.00c 1882 AVX :VCVTTPD2DQ + VCVTDQ2PD L: 3.74ns= 15.0c T: 0.93ns= 3.75c 1883 AVX :VCVTDQ2PS ymm, ymm L: 1.00ns= 4.0c T: 0.50ns= 2.00c 1884 AVX :VCVTPS2DQ ymm, ymm L: 1.00ns= 4.0c T: 0.50ns= 2.00c 1885 AVX :VCVTPS2DQ + VCVTDQ2PS L: 1.99ns= 8.0c T: 1.00ns= 4.00c 1886 AVX :VCVTTPS2DQ ymm, ymm L: 1.00ns= 4.0c T: 0.50ns= 2.00c 1887 AVX :VCVTTPS2DQ + VCVTDQ2PS L: 1.99ns= 8.0c T: 1.00ns= 4.00c 1888 AVX :VCVTPS2PD ymm, xmm L: 1.99ns= 8.0c T: 0.50ns= 2.00c 1889 AVX :VCVTPD2PS xmm, ymm L: 1.99ns= 8.0c T: 0.50ns= 2.00c 1890 AVX :VCVTPD2PS + VCVTPS2PD L: 3.74ns= 15.0c T: 0.93ns= 3.75c 1891 AVX :VPTESTPD ymm, ymm L: [no true dep.] T: 0.50ns= 2.00c 1892 AVX :VROUNDPD ymm, ymm, imm8 L: 1.00ns= 4.0c T: 0.50ns= 2.00c 1893 AVX :VBROADCASTSS ymm, m32 L: [memory dep.] T: 0.15ns= 0.62c 1894 AVX :VBROADCASTSD ymm, m64 L: [memory dep.] T: 0.15ns= 0.62c 1895 AVX :VBROADCASTF128 ymm, m128 L: [memory dep.] T: 0.15ns= 0.62c 1896 AVX :VEXTRACTF128 ymm, ymm, imm8 L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1897 AVX :VINSERTF128 ym, ym, xm, im8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1898 AVX :VPERM2F128 y1, y2, y3, imm L: 1.25ns= 5.0c T: 0.75ns= 3.00c 1899 AVX :VMOVDQA ymm, ymm L: 0.50ns= 2.0c T: 0.15ns= 0.60c 1900 AVX :VMOVDQA ymm, [m256] L: [memory dep.] T: 0.25ns= 1.00c 1901 AVX :VMOVDQA [m256], ymm L: [memory dep.] T: 4.48ns= 18.00c 1902 AVX :VMOVDQA LS pair L: 4.73ns= 19.0c T: 4.73ns= 19.00c 1903 AVX :VMOVDQU ymm, ymm L: 0.50ns= 2.0c T: 0.15ns= 0.60c 1904 AVX :VMOVDQU ymm, [m256] L: [memory dep.] T: 0.25ns= 1.00c 1905 AVX :VMOVDQU [m256], ymm L: [memory dep.] T: 5.23ns= 21.00c 1906 AVX :VMOVDQU aligned LS pair L: 5.48ns= 22.0c T: 5.48ns= 22.00c 1907 AVX :VMOVDQU ymm, [m256 + 4] L: [memory dep.] T: 1.02ns= 4.08c 1908 AVX :VMOVDQU [m256 + 4], ymm L: [memory dep.] T: 5.73ns= 23.00c 1909 AVX :VMOVDQU unaligned LS pair L: 5.73ns= 23.0c T: 5.73ns= 23.00c 1910 AVX :VMOVNTDQ [m256], ymm L: [memory dep.] T: 19.92ns= 19.92c 1911 AVX :VLDDQU ymm, [m256 + 4] L: [memory dep.] T: 0.71ns= 2.83c 1912 AVX :VZEROUPPER L: [no true dep.] T: 1.00ns= 4.00c 1913 AVX :VZEROALL L: [no true dep.] T: 1.49ns= 6.00c 1914 FMA4 :VFMADDPS ymm,ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1915 FMA3 :VFMADD132PS ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1916 FMA3 :VFMADD213PS ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1917 FMA3 :VFMADD231PS ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1918 FMA4 :VFMSUBPS ymm,ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1919 FMA3 :VFMSUB132PS ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1920 FMA3 :VFMSUB213PS ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1921 FMA3 :VFMSUB231PS ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1922 FMA4 :VFNMADDPS ymm,ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1923 FMA3 :VFNMADD132PS ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1924 FMA3 :VFNMADD213PS ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1925 FMA3 :VFNMADD231PS ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1926 FMA4 :VFNMSUBPS ymm,ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1927 FMA3 :VFNMSUB132PS ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1928 FMA3 :VFNMSUB213PS ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1929 FMA3 :VFNMSUB231PS ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1930 FMA4 :VFMADDSUBPS ymm,ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1931 FMA3 :VFMADDSUB132PS ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1932 FMA3 :VFMADDSUB213PS ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1933 FMA3 :VFMADDSUB231PS ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1934 FMA4 :VFMSUBADDPS ymm,ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1935 FMA3 :VFMSUBADD132PS ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1936 FMA3 :VFMSUBADD213PS ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1937 FMA3 :VFMSUBADD231PS ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1938 FMA4 :VFMADDPD ymm,ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1939 FMA3 :VFMADD132PD ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1940 FMA3 :VFMADD213PD ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1941 FMA3 :VFMADD231PD ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1942 FMA4 :VFMSUBPD ymm,ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1943 FMA3 :VFMSUB132PD ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1944 FMA3 :VFMSUB213PD ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1945 FMA3 :VFMSUB231PD ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1946 FMA4 :VFNMADDPD ymm,ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1947 FMA3 :VFNMADD132PD ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1948 FMA3 :VFNMADD213PD ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1949 FMA3 :VFNMADD231PD ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1950 FMA4 :VFNMSUBPD ymm,ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1951 FMA3 :VFNMSUB132PD ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1952 FMA3 :VFNMSUB213PD ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1953 FMA3 :VFNMSUB231PD ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1954 FMA4 :VFMADDSUBPD ymm,ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1955 FMA3 :VFMADDSUB132PD ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1956 FMA3 :VFMADDSUB213PD ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1957 FMA3 :VFMADDSUB231PD ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1958 FMA4 :VFMSUBADDPD ymm,ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1959 FMA3 :VFMSUBADD132PD ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1960 FMA3 :VFMSUBADD213PD ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1961 FMA3 :VFMSUBADD231PD ymm,ymm,ymm L: 1.25ns= 5.0c T: 0.27ns= 1.08c 1962 XOP :VFRCZSD ymm, ymm L: 2.49ns= 10.0c T: 0.50ns= 2.00c 1963 XOP :VFRCZPD ymm, ymm L: 2.49ns= 10.0c T: 0.50ns= 2.00c 1964 XOP :VPCMOV ymm, ymm, ymm, ymm L: 0.50ns= 2.0c T: 0.50ns= 2.00c 1965 XOP :VPERMIL2PS ym,ym,ym,ym,im L: 0.75ns= 3.0c T: 0.50ns= 2.00c 1966 XOP :VPERMIL2PD ym,ym,ym,ym,im L: 0.75ns= 3.0c T: 0.50ns= 2.00c 1967 F16C :VCVTPS2PH + VCVTPH2PS L: 1.99ns= 8.0c T: 1.00ns= 4.00c 1968 F16C :VCVTPS2PH xmm, ymm, imm8 L: 1.99ns= 8.0c T: 0.50ns= 2.00c 1969 F16C :VCVTPH2PS ymm, xmm L: 1.99ns= 8.0c T: 0.50ns= 2.00c 1970 F16C :VCVTPS2PH + VCVTPH2PS L: 1.99ns= 8.0c T: 1.00ns= 4.00c 1974 X86 :MOV+ADD r8, r8 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1975 X86 :MOV+ADD r16, r16 L: 0.50ns= 2.0c T: 0.25ns= 1.00c 1976 X86 :MOV+ADD r32, r32 L: 0.50ns= 2.0c T: 0.18ns= 0.74c 1978 MMX :MOVQ+PADDB mm, mm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1979 MMX :MOVQ+PADDW mm, mm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1980 MMX :MOVQ+PADDD mm, mm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1981 SSE2 :MOVQ+PADDQ mm, mm L: 1.00ns= 4.0c T: 0.25ns= 1.00c 1983 SSE :MOVSS+ADDSS xmm, xmm L: 1.99ns= 8.0c T: 0.25ns= 1.00c 1984 AVX :VMOVSS+VADDSS xm, xm, xm L: 1.99ns= 8.0c T: 0.25ns= 1.00c 1985 SSE :MOVAPS+ADDPS xmm, xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1986 AVX :VMOVAPS+VADDPS xm, xm, xm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1987 SSE2 :MOVSD+ADDSD xmm, xmm L: 1.99ns= 8.0c T: 0.25ns= 1.00c 1988 AVX :VMOVSD+VADDSD xm, xm, xm L: 1.99ns= 8.0c T: 0.25ns= 1.00c 1989 SSE2 :MOVAPD+ADDPD xmm, xmm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1990 AVX :VMOVAPD+VADDPD xm, xm, xm L: 1.25ns= 5.0c T: 0.16ns= 0.63c 1991 SSE2 :MOVDQA+PADDB xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1992 SSE2 :MOVDQA+PADDW xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1993 SSE2 :MOVDQA+PADDD xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1994 SSE2 :MOVDQA+PADDQ xmm, xmm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1995 AVX :VMOVDQA+VPADDB xm, xm, xm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1996 AVX :VMOVDQA+VPADDW xm, xm, xm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1997 AVX :VMOVDQA+VPADDD xm, xm, xm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1998 AVX :VMOVDQA+VPADDQ xm, xm, xm L: 0.50ns= 2.0c T: 0.12ns= 0.50c 1999 AVX :VMOVAPS+VADDPS ym, ym, ym L: 2.49ns= 10.0c T: 0.25ns= 1.00c 2000 AVX :VMOVAPD+VADDPD ym, ym, ym L: 2.49ns= 10.0c T: 0.25ns= 1.00c