private://user_outputs/1/3631/task1_AI-UPV_2.tsv_en.tsv

Evaluation
EXIST 2022 T1 EN
Evall log
				{"files":{"EXIST2022_test_GOLD_EvALL_Task1_en.tsv":{"description":"The file is correctly parser without errors or warnings.\\nFile name: EXIST2022_test_GOLD_EvALL_Task1_en.tsv.","errors":{},"gold":true,"name":"EXIST2022_test_GOLD_EvALL_Task1_en.tsv","status":"OK"},"task1_AI-UPV_2.tsv_en.tsv":{"description":"The file is correctly parser without errors or warnings.\\nFile name: task1_AI-UPV_2.tsv_en.tsv.","errors":{},"gold":false,"name":"task1_AI-UPV_2.tsv_en.tsv","status":"OK"}},"metrics":{"Accuracy":{"acronym":"Acc","description":"Coming soon!","name":"Accuracy","results":{"average_per_test_case":0.7692307692307693,"test_cases":[{"average":0.7692307692307693,"name":"EXIST2022"}]},"status":"OK"},"CrossEntropy":{"acronym":"CE","description":"Coming soon!\\nThe evaluation FAIL.","name":"Cross Entropy","preconditions":{"METRIC_PRECONDITION_NOT_VALID_FORMAT_FOR_CONTEXT_EVALUATION":{"description":" The selected metric cannot be evaluated as the formats of the gold and predictions are not valid for this evaluation context.\\nThe metric name is: Cross Entropy.\\nTest case(s) name: EXIST2022.","name":"METRIC_PRECONDITION_NOT_VALID_FORMAT_FOR_CONTEXT_EVALUATION","status":"FAIL","test_cases":["EXIST2022"]}},"results":{"average_per_test_case":null,"test_cases":[]},"status":"FAIL"},"DCG":{"acronym":"DCG","description":"Coming soon!\\nThe evaluation FAIL.","name":"Discounted Cumulative Gain","preconditions":{"METRIC_PRECONDITION_NOT_VALID_FORMAT_FOR_CONTEXT_EVALUATION":{"description":" The selected metric cannot be evaluated as the formats of the gold and predictions are not valid for this evaluation context.\\nThe metric name is: Discounted Cumulative Gain.\\nTest case(s) name: EXIST2022.","name":"METRIC_PRECONDITION_NOT_VALID_FORMAT_FOR_CONTEXT_EVALUATION","status":"FAIL","test_cases":["EXIST2022"]}},"results":{"average_per_test_case":null,"test_cases":[]},"status":"FAIL"},"FMeasure":{"acronym":"F1","description":"Coming soon!","name":"F-Measure","results":{"average_per_test_case":0.7679917904787399,"test_cases":[{"average":0.7679917904787399,"classes":{"non-sexist":0.7849462365591396,"sexist":0.7510373443983402},"name":"EXIST2022"}]},"status":"OK"},"ICM":{"acronym":"ICM","description":"Coming soon!","name":"Information Contrast model","results":{"average_per_test_case":0.2960100162442705,"test_cases":[{"average":0.2960100162442705,"name":"EXIST2022"}]},"status":"OK"},"ICMNorm":{"acronym":"ICM-Norm","description":"Coming soon!","name":"Normalized Information Contrast Model","results":{"average_per_test_case":0.6512906799857437,"test_cases":[{"average":0.6512906799857437,"name":"EXIST2022"}]},"status":"OK"},"ICMSoft":{"acronym":"ICM-Soft","description":"Coming soon!\\nThe evaluation FAIL.","name":"Information Contrast Model Soft","preconditions":{"METRIC_PRECONDITION_NOT_VALID_FORMAT_FOR_CONTEXT_EVALUATION":{"description":" The selected metric cannot be evaluated as the formats of the gold and predictions are not valid for this evaluation context.\\nThe metric name is: Information Contrast Model Soft.\\nTest case(s) name: EXIST2022.","name":"METRIC_PRECONDITION_NOT_VALID_FORMAT_FOR_CONTEXT_EVALUATION","status":"FAIL","test_cases":["EXIST2022"]}},"results":{"average_per_test_case":null,"test_cases":[]},"status":"FAIL"},"ICMSoftNorm":{"acronym":"ICM-Soft-Norm","description":"Coming soon!\\nThe evaluation FAIL.","name":"Normalized Information Contrast Model Soft","preconditions":{"METRIC_PRECONDITION_NOT_VALID_FORMAT_FOR_CONTEXT_EVALUATION":{"description":" The selected metric cannot be evaluated as the formats of the gold and predictions are not valid for this evaluation context.\\nThe metric name is: Normalized Information Contrast Model Soft.\\nTest case(s) name: EXIST2022.","name":"METRIC_PRECONDITION_NOT_VALID_FORMAT_FOR_CONTEXT_EVALUATION","status":"FAIL","test_cases":["EXIST2022"]}},"results":{"average_per_test_case":null,"test_cases":[]},"status":"FAIL"},"Kappa":{"acronym":"Kappa","description":"Coming soon!","name":"Cohen's Kappa","results":{"average_per_test_case":0.5406022233674448,"test_cases":[{"average":0.5406022233674448,"name":"EXIST2022"}]},"status":"OK"},"MainReciprocalRank":{"acronym":"UNKNOWN","description":"UNKNOWN\\nThe evaluation FAIL.","errors":[{"description":"The selected metric does not exist.\\nThe metric name is: MainReciprocalRank.","name":"METRIC_UNKONW_METRIC_ERROR"}],"name":"MainReciprocalRank","status":"FAIL"},"MeanAveragePrecision":{"acronym":"UNKNOWN","description":"UNKNOWN\\nThe evaluation FAIL.","errors":[{"description":"The selected metric does not exist.\\nThe metric name is: MeanAveragePrecision.","name":"METRIC_UNKONW_METRIC_ERROR"}],"name":"MeanAveragePrecision","status":"FAIL"},"Precision":{"acronym":"Pr","description":"Coming soon!","name":"Precision","results":{"average_per_test_case":0.7717576349720952,"test_cases":[{"average":0.7717576349720952,"classes":{"non-sexist":0.8656126482213439,"sexist":0.6779026217228464},"name":"EXIST2022"}]},"status":"OK"},"PrecisionAtK":{"acronym":"P@k","description":"Coming soon!\\nThe evaluation FAIL.","name":"Precision at k","preconditions":{"METRIC_PRECONDITION_NOT_VALID_FORMAT_FOR_CONTEXT_EVALUATION":{"description":" The selected metric cannot be evaluated as the formats of the gold and predictions are not valid for this evaluation context.\\nThe metric name is: Precision at k.\\nTest case(s) name: EXIST2022.","name":"METRIC_PRECONDITION_NOT_VALID_FORMAT_FOR_CONTEXT_EVALUATION","status":"FAIL","test_cases":["EXIST2022"]}},"results":{"average_per_test_case":null,"test_cases":[]},"status":"FAIL"},"RPrecision":{"acronym":"RPre.","description":"Coming soon!\\nThe evaluation FAIL.","name":"R Precision","preconditions":{"METRIC_PRECONDITION_NOT_VALID_FORMAT_FOR_CONTEXT_EVALUATION":{"description":" The selected metric cannot be evaluated as the formats of the gold and predictions are not valid for this evaluation context.\\nThe metric name is: R Precision.\\nTest case(s) name: EXIST2022.","name":"METRIC_PRECONDITION_NOT_VALID_FORMAT_FOR_CONTEXT_EVALUATION","status":"FAIL","test_cases":["EXIST2022"]}},"results":{"average_per_test_case":null,"test_cases":[]},"status":"FAIL"},"Recall":{"acronym":"Re","description":"Coming soon!","name":"Recall","results":{"average_per_test_case":0.7799466260007625,"test_cases":[{"average":0.7799466260007625,"classes":{"non-sexist":0.7180327868852459,"sexist":0.8418604651162791},"name":"EXIST2022"}]},"status":"OK"},"SystemPrecision":{"acronym":"SP","description":"Coming soon!\\nThe evaluation FAIL.","name":"System Precision","preconditions":{"METRIC_PRECONDITION_DIFFERENT_ITEMS_IN_GOLD_AND_PRED_ERROR":{"description":"The selected metric cannot be evaluated because the gold and predictions have the same number of items.\\nThe metric name is: System Precision.\\nTest case(s) name: EXIST2022.","name":"METRIC_PRECONDITION_DIFFERENT_ITEMS_IN_GOLD_AND_PRED_ERROR","status":"FAIL","test_cases":["EXIST2022"]}},"results":{"average_per_test_case":null,"test_cases":[]},"status":"FAIL"},"nDCG":{"acronym":"nDCG","description":"Coming soon!\\nThe evaluation FAIL.","name":"Normalized Discounted Cumulative Gain","preconditions":{"METRIC_PRECONDITION_NOT_VALID_FORMAT_FOR_CONTEXT_EVALUATION":{"description":" The selected metric cannot be evaluated as the formats of the gold and predictions are not valid for this evaluation context.\\nThe metric name is: Normalized Discounted Cumulative Gain.\\nTest case(s) name: EXIST2022.","name":"METRIC_PRECONDITION_NOT_VALID_FORMAT_FOR_CONTEXT_EVALUATION","status":"FAIL","test_cases":["EXIST2022"]}},"results":{"average_per_test_case":null,"test_cases":[]},"status":"FAIL"}}}
ICM Norm
0.65
Sistema
AI-UPV_2
Partición resultados
All
Tarea
EXIST 2022: Sexism detection (EN)
Fuente
Publicación
Precisión
0.77
Recall
0.78
F1
0.77
Accuracy
0.77
ICM
0.30
Kappa
0.54