TASK |
DATASET |
MODEL |
METRIC NAME |
METRIC VALUE |
GLOBAL RANK |
REMOVE |
Natural Language Inference
|
ANLI test
|
PaLM 540B (Self Consistency)
|
A2
|
64.5
|
# 5
|
|
Natural Language Inference
|
ANLI test
|
PaLM 540B (Self Consistency)
|
A3
|
63.4
|
# 6
|
|
Natural Language Inference
|
ANLI test
|
PaLM 540B (Self Improvement, Standard-Prompting)
|
A2
|
64.8
|
# 4
|
|
Natural Language Inference
|
ANLI test
|
PaLM 540B (Self Improvement, Standard-Prompting)
|
A3
|
66.9
|
# 5
|
|
Natural Language Inference
|
ANLI test
|
PaLM 540B (Self Improvement, CoT Prompting)
|
A2
|
65.3
|
# 3
|
|
Natural Language Inference
|
ANLI test
|
PaLM 540B (Self Improvement, CoT Prompting)
|
A3
|
67.3
|
# 3
|
|
Natural Language Inference
|
ANLI test
|
PaLM 540B (Self Improvement, Self Consistency)
|
A2
|
66.5
|
# 2
|
|
Natural Language Inference
|
ANLI test
|
PaLM 540B (Self Improvement, Self Consistency)
|
A3
|
67.9
|
# 2
|
|
Natural Language Inference
|
ANLI test
|
PaLM 540B (Standard-Prompting)
|
A2
|
55.8
|
# 9
|
|
Natural Language Inference
|
ANLI test
|
PaLM 540B (Standard-Prompting)
|
A3
|
55.8
|
# 9
|
|
Natural Language Inference
|
ANLI test
|
PaLM 540B (CoT Prompting)
|
A2
|
58.9
|
# 8
|
|
Natural Language Inference
|
ANLI test
|
PaLM 540B (CoT Prompting)
|
A3
|
60.6
|
# 7
|
|
Common Sense Reasoning
|
ARC (Challenge)
|
PaLM 540B (Self Consistency)
|
Accuracy
|
88.7
|
# 6
|
|
Common Sense Reasoning
|
ARC (Challenge)
|
PaLM 540B (CoT Prompting)
|
Accuracy
|
85.2
|
# 12
|
|
Common Sense Reasoning
|
ARC (Challenge)
|
PaLM 540B (Self Improvement, Self Consistency)
|
Accuracy
|
89.8
|
# 5
|
|
Common Sense Reasoning
|
ARC (Challenge)
|
PaLM 540B (Standard-Prompting)
|
Accuracy
|
87.1
|
# 9
|
|
Common Sense Reasoning
|
ARC (Challenge)
|
PaLM 540B (Self Improvement, Standard-Prompting)
|
Accuracy
|
87.2
|
# 8
|
|
Common Sense Reasoning
|
ARC (Challenge)
|
PaLM 540B (Self Improvement, CoT Prompting)
|
Accuracy
|
88.3
|
# 7
|
|
Question Answering
|
DROP
|
PaLM 540B (Standard-Prompting)
|
Accuracy
|
60
|
# 6
|
|
Question Answering
|
DROP
|
PaLM 540B (Self Improvement, Self Consistency)
|
Accuracy
|
83
|
# 1
|
|
Question Answering
|
DROP
|
PaLM 540B (Self Improvement, CoT Prompting)
|
Accuracy
|
76.2
|
# 3
|
|
Question Answering
|
DROP
|
PaLM 540B (Self Improvement, Standard-Prompting)
|
Accuracy
|
71.7
|
# 4
|
|
Question Answering
|
DROP
|
PaLM 540B (Self Consistency)
|
Accuracy
|
78.2
|
# 2
|
|
Question Answering
|
DROP
|
PaLM 540B (CoT Prompting)
|
Accuracy
|
70.6
|
# 5
|
|
Arithmetic Reasoning
|
GSM8K
|
PaLM 540B (Standard-Prompting)
|
Accuracy
|
17.9
|
# 142
|
|
Arithmetic Reasoning
|
GSM8K
|
PaLM 540B (Standard-Prompting)
|
Parameters (Billion)
|
540
|
# 111
|
|
Arithmetic Reasoning
|
GSM8K
|
PaLM 540B (Self Improvement, Self Consistency)
|
Accuracy
|
82.1
|
# 56
|
|
Arithmetic Reasoning
|
GSM8K
|
PaLM 540B (Self Improvement, Self Consistency)
|
Parameters (Billion)
|
540
|
# 111
|
|
Arithmetic Reasoning
|
GSM8K
|
PaLM 540B (Self Improvement, CoT Prompting)
|
Accuracy
|
73.5
|
# 87
|
|
Arithmetic Reasoning
|
GSM8K
|
PaLM 540B (Self Improvement, CoT Prompting)
|
Parameters (Billion)
|
540
|
# 111
|
|
Arithmetic Reasoning
|
GSM8K
|
PaLM 540B (Self Improvement, Standard-Prompting)
|
Accuracy
|
32.2
|
# 135
|
|
Arithmetic Reasoning
|
GSM8K
|
PaLM 540B (Self Improvement, Standard-Prompting)
|
Parameters (Billion)
|
540
|
# 111
|
|
Arithmetic Reasoning
|
GSM8K
|
PaLM 540B (Self Consistency)
|
Accuracy
|
74.4
|
# 79
|
|
Arithmetic Reasoning
|
GSM8K
|
PaLM 540B (Self Consistency)
|
Parameters (Billion)
|
540
|
# 111
|
|
Arithmetic Reasoning
|
GSM8K
|
PaLM 540B (CoT Prompting)
|
Accuracy
|
56.5
|
# 113
|
|
Arithmetic Reasoning
|
GSM8K
|
PaLM 540B (CoT Prompting)
|
Parameters (Billion)
|
540
|
# 111
|
|
Question Answering
|
OpenBookQA
|
PaLM 540B (Self Improvement, Self Consistency)
|
Accuracy
|
94.4
|
# 3
|
|
Question Answering
|
OpenBookQA
|
PaLM 540B (Standard-Prompting)
|
Accuracy
|
84.4
|
# 15
|
|
Question Answering
|
OpenBookQA
|
PaLM 540B (CoT Prompting)
|
Accuracy
|
86.4
|
# 14
|
|
Question Answering
|
OpenBookQA
|
PaLM 540B (Self Consistency)
|
Accuracy
|
90
|
# 8
|
|
Question Answering
|
OpenBookQA
|
PaLM 540B (Self Improvement, Standard-Prompting)
|
Accuracy
|
92
|
# 6
|
|
Question Answering
|
OpenBookQA
|
PaLM 540B (Self Improvement, CoT Prompting)
|
Accuracy
|
93
|
# 5
|
|