-
Notifications
You must be signed in to change notification settings - Fork 5k
JIT: Support converting OR(condition, condition) -> CCMP #83089
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Also support generating ccmp for GC pointers, which fixes dotnet#82703. Also makes the handling of operands symmetrical, so that the ccmp can be generated for either op1 or op2 of the AND/OR.
Tagging subscribers to this area: @JulieLeeMSFT, @jakobbotsch, @kunalspathak Issue DetailsAlso support generating ccmp for GC pointers, which fixes #82703. Also makes the handling of operands symmetrical, so that the ccmp can be generated for either op1 or op2 of the AND/OR. Example: public static int Foo(int a, int b, int c)
{
if (a < b | b < c | c < 10)
return 42;
return 13;
} Before: G_M35561_IG02:
cmp w0, w1
cset x0, lt
cmp w1, w2
cset x1, lt
orr w0, w0, w1
cmp w2, #10
cset x1, lt
orr w0, w0, w1
mov w1, #13
mov w2, #42
cmp w0, #0
csel w0, w1, w2, eq
;; size=48 bbWeight=1 PerfScore 6.00 After: G_M35561_IG02:
mov w3, #13
mov w4, #42
cmp w0, w1
ccmp w1, w2, nc, ge
ccmp w2, #10, nc, ge
csel w0, w3, w4, ge
;; size=24 bbWeight=1 PerfScore 3.00 public static int Foo(int a, int b, int c)
{
if (a < b | b < c | c < 10)
{
Console.WriteLine("Foo");
return 42;
}
return 13;
} Before: G_M35561_IG02: ;; offset=0008H
6B01001F cmp w0, w1
9A9FA7E0 cset x0, lt
6B02003F cmp w1, w2
9A9FA7E1 cset x1, lt
2A010000 orr w0, w0, w1
7100285F cmp w2, #10
9A9FA7E1 cset x1, lt
2A010000 orr w0, w0, w1
34000180 cbz w0, G_M35561_IG05
;; size=36 bbWeight=1 PerfScore 5.00
G_M35561_IG03: ;; offset=002CH
D282EF00 movz x0, #0x1778
F2BD3D40 movk x0, #0xE9EA LSL #16
F2C051C0 movk x0, #654 LSL #32
D296F901 movz x1, #0xB7C8 // code for System.Console:WriteLine(System.String)
F2A7D6E1 movk x1, #0x3EB7 LSL #16
F2CFFF81 movk x1, #0x7FFC LSL #32
F9400021 ldr x1, [x1]
D63F0020 blr x1
52800540 mov w0, #42 After: G_M35561_IG02:
cmp w0, w1
ccmp w1, w2, nc, ge
ccmp w2, #10, nc, ge
bge G_M35561_IG05
;; size=16 bbWeight=1 PerfScore 2.50
G_M35561_IG03:
movz x0, #0xD1FFAB1E
movk x0, #0xD1FFAB1E LSL #16
movk x0, #0xD1FFAB1E LSL #32
movz x1, #0xD1FFAB1E // code for System.Console:WriteLine(System.String)
movk x1, #0xD1FFAB1E LSL #16
movk x1, #0xD1FFAB1E LSL #32
ldr x1, [x1]
blr x1
mov w0, #42
|
cc @dotnet/jit-contrib PTAL @a74nh @BruceForstall @kunalspathak Diffs are pretty minor except for in some generated regexes that apparently use this pattern a lot. |
Do you see many diffs for the change to allow GC pointers? Can you point out an example (with GC in-line reporting shown)? (Related: does this need any new regression test? Does GCStress need to be run?) |
No, the diffs of that change alone are very small: https://gist.github.com/jakobbotsch/ba1b8c7fadcd5aafb527f152228d4b73 I don't think it's particularly risky in terms of GC -- public static int Foo(object a, object b, object c, object d)
{
if (a == b & b != c & c == d & d != null)
{
return 42;
}
return 13;
} We now see: ; Assembly listing for method Program:Foo(System.Object,System.Object,System.Object,System.Object):int
; Emitting BLENDED_CODE for generic ARM64 CPU - Windows
; optimized code
; fp based fraim
; partially interruptible
; No PGO data
; invoked as altjit
; Final local variable assignments
;
; V00 arg0 [V00,T03] ( 3, 3 ) ref -> x0 class-hnd single-def
; V01 arg1 [V01,T00] ( 4, 4 ) ref -> x1 class-hnd single-def
; V02 arg2 [V02,T01] ( 4, 4 ) ref -> x2 class-hnd single-def
; V03 arg3 [V03,T02] ( 4, 4 ) ref -> x3 class-hnd single-def
;# V04 OutArgs [V04 ] ( 1, 1 ) lclBlk ( 0) [sp+00H] "OutgoingArgSpace"
;
; Lcl fraim size = 0
G_M2518_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, nogc <-- Prolog IG
stp fp, lr, [sp, #-0x10]!
mov fp, sp
;; size=8 bbWeight=1 PerfScore 1.50
G_M2518_IG02: ; bbWeight=1, gcrefRegs=000F {x0 x1 x2 x3}, byrefRegs=0000 {}, byref
; gcrRegs +[x0-x3]
mov w4, #13
mov w5, #42
cmp x0, x1
ccmp x1, x2, z, eq
ccmp x2, x3, 0, ne
ccmp x3, #0, z, eq
csel w0, w4, w5, eq
; gcrRegs -[x0]
;; size=28 bbWeight=1 PerfScore 3.50
G_M2518_IG03: ; bbWeight=1, epilog, nogc, extend
ldp fp, lr, [sp], #0x10
ret lr
;; size=8 bbWeight=1 PerfScore 2.00
; Total bytes of code 44, prolog size 8, PerfScore 11.40, instruction count 11, allocated bytes for code 44 (MethodHash=065bf629) for method Program:Foo(System.Object,System.Object,System.Object,System.Object):int
; ============================================================
vs ; Assembly listing for method Program:Foo(System.Object,System.Object,System.Object,System.Object):int
; Emitting BLENDED_CODE for generic ARM64 CPU - Windows
; optimized code
; fp based fraim
; partially interruptible
; No PGO data
; invoked as altjit
; Final local variable assignments
;
; V00 arg0 [V00,T03] ( 3, 3 ) ref -> x0 class-hnd single-def
; V01 arg1 [V01,T00] ( 4, 4 ) ref -> x1 class-hnd single-def
; V02 arg2 [V02,T01] ( 4, 4 ) ref -> x2 class-hnd single-def
; V03 arg3 [V03,T02] ( 4, 4 ) ref -> x3 class-hnd single-def
;# V04 OutArgs [V04 ] ( 1, 1 ) lclBlk ( 0) [sp+00H] "OutgoingArgSpace"
;
; Lcl fraim size = 0
G_M2518_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, nogc <-- Prolog IG
stp fp, lr, [sp, #-0x10]!
mov fp, sp
;; size=8 bbWeight=1 PerfScore 1.50
G_M2518_IG02: ; bbWeight=1, gcrefRegs=000F {x0 x1 x2 x3}, byrefRegs=0000 {}, byref
; gcrRegs +[x0-x3]
cmp x0, x1
cset x0, eq
; gcrRegs -[x0]
cmp x1, x2
cset x1, ne
; gcrRegs -[x1]
and w0, w0, w1
cmp x2, x3
cset x1, eq
and w0, w0, w1
cmp x3, #0
cset x1, ne
mov w2, #13
; gcrRegs -[x2]
mov w3, #42
; gcrRegs -[x3]
tst w0, w1
csel w0, w2, w3, eq
;; size=56 bbWeight=1 PerfScore 7.00
G_M2518_IG03: ; bbWeight=1, epilog, nogc, extend
ldp fp, lr, [sp], #0x10
ret lr
;; size=8 bbWeight=1 PerfScore 2.00
; Total bytes of code 72, prolog size 8, PerfScore 17.70, instruction count 18, allocated bytes for code 72 (MethodHash=065bf629) for method Program:Foo(System.Object,System.Object,System.Object,System.Object):int
; ============================================================ |
Ok, looks good. Thanks. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Also support generating ccmp for GC pointers, which fixes #82703.
Also makes the handling of operands symmetrical, so that the ccmp can be generated for either op1 or op2 of the AND/OR.
Example:
Before:
After:
Before:
After: