Add subtask max score mode in core CMS

stefano-maggiolo · stefano-maggiolo · commit e324d2513fd1 · 2018-10-01T08:40:34.000+01:00
diff --git a/cms/db/task.py b/cms/db/task.py
@@ -3,7 +3,7 @@
 
 # Contest Management System - http://cms-dev.github.io/
 # Copyright © 2010-2014 Giovanni Mascellani <mascellani@poisson.phc.unipi.it>
-# Copyright © 2010-2014 Stefano Maggiolo <s.maggiolo@gmail.com>
+# Copyright © 2010-2018 Stefano Maggiolo <s.maggiolo@gmail.com>
 # Copyright © 2010-2012 Matteo Boscariol <boscarim@hotmail.com>
 # Copyright © 2012-2018 Luca Wehrstedt <luca.wehrstedt@gmail.com>
 # Copyright © 2013 Bernard Blackham <bernard@largestprime.net>
@@ -46,7 +46,8 @@
 
 from cms import TOKEN_MODE_DISABLED, TOKEN_MODE_FINITE, TOKEN_MODE_INFINITE, \
     FEEDBACK_LEVEL_FULL, FEEDBACK_LEVEL_RESTRICTED
-from cmscommon.constants import SCORE_MODE_MAX, SCORE_MODE_MAX_TOKENED_LAST
+from cmscommon.constants import \
+    SCORE_MODE_MAX, SCORE_MODE_MAX_SUBTASK, SCORE_MODE_MAX_TOKENED_LAST
 
 from . import Codename, Filename, FilenameSchemaArray, Digest, Base, Contest
 
@@ -216,7 +217,9 @@ class Task(Base):
 
     # Score mode for the task.
     score_mode = Column(
-        Enum(SCORE_MODE_MAX_TOKENED_LAST, SCORE_MODE_MAX,
+        Enum(SCORE_MODE_MAX_TOKENED_LAST,
+             SCORE_MODE_MAX,
+             SCORE_MODE_MAX_SUBTASK,
              name="score_mode"),
         nullable=False,
         default=SCORE_MODE_MAX_TOKENED_LAST)
diff --git a/cms/grading/scoring.py b/cms/grading/scoring.py
@@ -30,13 +30,15 @@
 from __future__ import unicode_literals
 from future.builtins.disabled import *  # noqa
 from future.builtins import *  # noqa
+from six import iteritems, itervalues
 
 from collections import namedtuple
 
 from sqlalchemy.orm import joinedload
 
 from cms.db import Submission
-from cmscommon.constants import SCORE_MODE_MAX, SCORE_MODE_MAX_TOKENED_LAST
+from cmscommon.constants import \
+    SCORE_MODE_MAX, SCORE_MODE_MAX_SUBTASK, SCORE_MODE_MAX_TOKENED_LAST
 
 
 __all__ = [
@@ -137,6 +139,8 @@ def task_score(participation, task):
 
     if task.score_mode == SCORE_MODE_MAX:
         return _task_score_max(submissions_and_results)
+    if task.score_mode == SCORE_MODE_MAX_SUBTASK:
+        return _task_score_max_subtask(submissions_and_results)
     elif task.score_mode == SCORE_MODE_MAX_TOKENED_LAST:
         return _task_score_max_tokened_last(submissions_and_results)
     else:
@@ -180,6 +184,58 @@ def _task_score_max_tokened_last(submissions_and_results):
     return max(last_score, max_tokened_score), partial
 
 
+def _task_score_max_subtask(submissions_and_results):
+    """Compute score using the "max subtask" score mode.
+
+    This has been used in IOI since 2017. The score of a participant on a
+    task is the sum, over the subtasks, of the maximum score amongst all
+    submissions for that subtask (not yet computed scores count as 0.0).
+
+    If this score mode is selected, all tasks should be children of
+    ScoreTypeGroup, or follow the same format for their score details. If
+    this is not true, the score mode will work as if the task had a single
+    subtask.
+
+    submissions_and_results ([(Submission, SubmissionResult|None)]): list of
+        all submissions and their results for the participant on the task (on
+        the dataset of interest); result is None if not available (that is,
+        if the submission has not been compiled).
+
+    return ((float, bool)): (score, partial), same as task_score().
+
+    """
+    # Maximum score for each subtask (not yet computed scores count as 0.0).
+    max_scores = {}
+
+    partial = False
+    for _, sr in submissions_and_results:
+        if sr is None or not sr.scored():
+            partial = True
+            continue
+
+        if sr.score_details == [] and sr.score == 0.0:
+            # Submission did not compile, ignore it.
+            continue
+
+        try:
+            subtask_scores = dict(
+                (subtask["idx"],
+                 subtask["score_fraction"] * subtask["max_score"])
+                for subtask in sr.score_details
+            )
+        except Exception:
+            subtask_scores = None
+
+        if subtask_scores is None or len(subtask_scores) == 0:
+            # Task's score type is not group, assume a single subtask.
+            subtask_scores = {1: sr.score}
+
+        for idx, score in iteritems(subtask_scores):
+            max_scores[idx] = max(max_scores.get(idx, 0.0), score)
+
+    return sum(itervalues(max_scores)), partial
+
+
 def _task_score_max(submissions_and_results):
     """Compute score using the "max" score mode.
 
diff --git a/cms/server/admin/templates/task.html b/cms/server/admin/templates/task.html
@@ -257,7 +257,8 @@ <h2 id="title_task_configuration" class="toggling_on">Task configuration</h2>
         <td>
           <select name="score_mode">
             <option value="{{ SCORE_MODE_MAX_TOKENED_LAST }}" {{ "selected" if task.score_mode == SCORE_MODE_MAX_TOKENED_LAST else "" }}>Use best among tokened and last submissions (IOI 2010-2012)</option>
-            <option value="{{ SCORE_MODE_MAX }}" {{ "selected" if task.score_mode == SCORE_MODE_MAX else "" }}>Use best among all submissions (IOI 2013-)</option>
+            <option value="{{ SCORE_MODE_MAX }}" {{ "selected" if task.score_mode == SCORE_MODE_MAX else "" }}>Use best among all submissions (IOI 2013-2016)</option>
+            <option value="{{ SCORE_MODE_MAX_SUBTASK }}" {{ "selected" if task.score_mode == SCORE_MODE_MAX_SUBTASK else "" }}>Use the sum over each subtask of the best result for that subtask across all submissions (IOI 2017-)</option>
           </select>
         </td>
       </tr>
diff --git a/cms/server/jinja2_toolbox.py b/cms/server/jinja2_toolbox.py
@@ -45,7 +45,8 @@
 from cms.grading import format_status_text
 from cms.grading.languagemanager import get_language
 from cms.locale import DEFAULT_TRANSLATION
-from cmscommon.constants import SCORE_MODE_MAX_TOKENED_LAST, SCORE_MODE_MAX
+from cmscommon.constants import \
+    SCORE_MODE_MAX, SCORE_MODE_MAX_SUBTASK, SCORE_MODE_MAX_TOKENED_LAST
 
 
 @contextfilter
@@ -155,8 +156,9 @@ def instrument_generic_toolbox(env):
     env.globals["SubmissionResult"] = SubmissionResult
     env.globals["UserTestResult"] = UserTestResult
 
-    env.globals["SCORE_MODE_MAX"] = SCORE_MODE_MAX
     env.globals["SCORE_MODE_MAX_TOKENED_LAST"] = SCORE_MODE_MAX_TOKENED_LAST
+    env.globals["SCORE_MODE_MAX"] = SCORE_MODE_MAX
+    env.globals["SCORE_MODE_MAX_SUBTASK"] = SCORE_MODE_MAX_SUBTASK
 
     env.globals["TOKEN_MODE_DISABLED"] = TOKEN_MODE_DISABLED
     env.globals["TOKEN_MODE_FINITE"] = TOKEN_MODE_FINITE
diff --git a/cmscommon/constants.py b/cmscommon/constants.py
@@ -29,5 +29,7 @@
 
 # Maximum score amongst all submissions.
 SCORE_MODE_MAX = "max"
+# Sum of maximum score for each subtask over all submissions.
+SCORE_MODE_MAX_SUBTASK = "max_subtask"
 # Maximum score among all tokened submissions and the last submission.
 SCORE_MODE_MAX_TOKENED_LAST = "max_tokened_last"
diff --git a/cmscontrib/loaders/italy_yaml.py b/cmscontrib/loaders/italy_yaml.py
@@ -3,7 +3,7 @@
 
 # Contest Management System - http://cms-dev.github.io/
 # Copyright © 2010-2014 Giovanni Mascellani <mascellani@poisson.phc.unipi.it>
-# Copyright © 2010-2017 Stefano Maggiolo <s.maggiolo@gmail.com>
+# Copyright © 2010-2018 Stefano Maggiolo <s.maggiolo@gmail.com>
 # Copyright © 2010-2012 Matteo Boscariol <boscarim@hotmail.com>
 # Copyright © 2013-2018 Luca Wehrstedt <luca.wehrstedt@gmail.com>
 # Copyright © 2014-2018 William Di Luigi <williamdiluigi@gmail.com>
@@ -43,7 +43,8 @@
 from cms.db import Contest, User, Task, Statement, Attachment, Team, Dataset, \
     Manager, Testcase
 from cms.grading.languagemanager import LANGUAGES, HEADER_EXTS
-from cmscommon.constants import SCORE_MODE_MAX, SCORE_MODE_MAX_TOKENED_LAST
+from cmscommon.constants import \
+    SCORE_MODE_MAX, SCORE_MODE_MAX_SUBTASK, SCORE_MODE_MAX_TOKENED_LAST
 from cmscommon.crypto import build_password
 from cmscommon.datetime import make_datetime
 from cmscontrib import touch
@@ -388,6 +389,8 @@ def get_task(self, get_statement=True):
 
         if conf.get("score_mode", None) == SCORE_MODE_MAX:
             args["score_mode"] = SCORE_MODE_MAX
+        elif conf.get("score_mode", None) == SCORE_MODE_MAX_SUBTASK:
+            args["score_mode"] = SCORE_MODE_MAX_SUBTASK
         elif conf.get("score_mode", None) == SCORE_MODE_MAX_TOKENED_LAST:
             args["score_mode"] = SCORE_MODE_MAX_TOKENED_LAST
 
diff --git a/cmstestsuite/unit_tests/grading/scoring_test.py b/cmstestsuite/unit_tests/grading/scoring_test.py
@@ -35,7 +35,8 @@
 from cmstestsuite.unit_tests.databasemixin import DatabaseMixin
 
 from cms.grading.scoring import task_score
-from cmscommon.constants import SCORE_MODE_MAX, SCORE_MODE_MAX_TOKENED_LAST
+from cmscommon.constants import \
+    SCORE_MODE_MAX, SCORE_MODE_MAX_SUBTASK, SCORE_MODE_MAX_TOKENED_LAST
 from cmscommon.datetime import make_datetime
 
 
@@ -56,18 +57,19 @@ def at(self, timestamp):
     def call(self):
         return task_score(self.participation, self.task)
 
-    def add_result(self, timestamp, score, tokened=False):
+    def add_result(self, timestamp, score, tokened=False, score_details=None):
+        score_details = score_details if score_details is not None else []
         submission = self.add_submission(
             participation=self.participation,
             task=self.task,
             timestamp=timestamp)
-        # task_score() only needs score, but all the fields must be set to
-        # declare the submission result as scored.
+        # task_score() only needs score and score_details, but all the fields
+        # must be set to declare the submission result as scored.
         self.add_submission_result(submission, self.task.active_dataset,
                                    score=score,
                                    public_score=score,
-                                   score_details={},
-                                   public_score_details={},
+                                   score_details=score_details,
+                                   public_score_details=score_details,
                                    ranking_score_details=[])
         if tokened:
             self.add_token(timestamp=timestamp, submission=submission)
@@ -148,6 +150,104 @@ def test_all_unscored(self):
         self.assertEqual(self.call(), (0.0, True))
 
 
+class TestTaskScoreMaxSubtask(TaskScoreMixin, unittest.TestCase):
+    """Tests for task_score() using the max_subtask score mode."""
+
+    def setUp(self):
+        super(TestTaskScoreMaxSubtask, self).setUp()
+        self.task.score_mode = SCORE_MODE_MAX_SUBTASK
+
+    @staticmethod
+    def subtask(idx, max_score, score_fraction):
+        """Return an item of score details for a subtask."""
+        return {
+            "idx": idx,
+            "max_score": max_score,
+            "score_fraction": score_fraction
+        }
+
+    def test_no_submissions(self):
+        self.assertEqual(self.call(), (0.0, False))
+
+    def test_task_not_group(self):
+        self.add_result(self.at(1), 66.6, tokened=False)
+        self.add_result(self.at(2), 44.4, tokened=False)
+        self.session.flush()
+        self.assertEqual(self.call(), (66.6, False))
+
+    def test_all_submissions_scored(self):
+        self.add_result(self.at(1), 30 * 0.2 + 40 * 0.5 + 30 * 0.1,
+                        score_details=[
+                            self.subtask(3, 30, 0.2),
+                            self.subtask(2, 40, 0.5),
+                            self.subtask(1, 30, 0.1),
+                        ])
+        self.add_result(self.at(2), 30 * 0.1 + 40 * 0.5 + 30 * 0.2,
+                        score_details=[
+                            self.subtask(2, 40, 0.5),
+                            self.subtask(1, 30, 0.2),
+                            self.subtask(3, 30, 0.1),
+                        ])
+        self.session.flush()
+        self.assertEqual(self.call(), (30 * 0.2 + 40 * 0.5 + 30 * 0.2, False))
+
+    def test_compilation_error_total_is_zero(self):
+        # Compilation errors have details=[].
+        self.add_result(self.at(1), 0.0, score_details=[])
+        self.add_result(self.at(2), 30 * 0.0 + 40 * 0.0 + 30 * 0.0,
+                        score_details=[
+                            self.subtask(3, 30, 0.0),
+                            self.subtask(2, 40, 0.0),
+                            self.subtask(1, 30, 0.0),
+                        ])
+        self.session.flush()
+        self.assertEqual(self.call(), (30 * 0.0 + 40 * 0.0 + 30 * 0.0, False))
+
+    def test_compilation_error_total_is_positive(self):
+        # Compilation errors have details=[].
+        self.add_result(self.at(1), 0.0, score_details=[])
+        self.add_result(self.at(2), 30 * 0.1 + 40 * 0.0 + 30 * 0.0,
+                        score_details=[
+                            self.subtask(3, 30, 0.1),
+                            self.subtask(2, 40, 0.0),
+                            self.subtask(1, 30, 0.0),
+                        ])
+        self.session.flush()
+        self.assertEqual(self.call(), (30 * 0.1 + 40 * 0.0 + 30 * 0.0, False))
+
+    def test_partial(self):
+        self.add_result(self.at(1), 30 * 0.2 + 40 * 0.5 + 30 * 0.1,
+                        score_details=[
+                            self.subtask(3, 30, 0.2),
+                            self.subtask(2, 40, 0.5),
+                            self.subtask(1, 30, 0.1),
+                        ])
+        self.add_result(self.at(2), 30 * 0.1 + 40 * 0.5 + 30 * 0.2,
+                        score_details=[
+                            self.subtask(3, 30, 0.1),
+                            self.subtask(2, 40, 0.5),
+                            self.subtask(1, 30, 0.2),
+                        ])
+        self.add_result(self.at(3), None)
+        self.session.flush()
+        self.assertEqual(self.call(), (30 * 0.2 + 40 * 0.5 + 30 * 0.2, True))
+
+    def test_rounding(self):
+        # No rounding should happen at the subtask or task level.
+        self.add_result(self.at(1), 80 + 0.0002,
+                        score_details=[
+                            self.subtask(1, 80, 1.0),
+                            self.subtask(2, 20, 0.00001),
+                        ])
+        self.add_result(self.at(2), 0.0004,
+                        score_details=[
+                            self.subtask(1, 80, 0.0),
+                            self.subtask(2, 20, 0.00002),
+                        ])
+        self.session.flush()
+        self.assertEqual(self.call(), (80 + 0.0004, False))
+
+
 class TestTaskScoreMax(TaskScoreMixin, unittest.TestCase):
     """Tests for task_score() using the max score mode."""
 
diff --git a/docs/External contest formats.rst b/docs/External contest formats.rst
@@ -139,7 +139,7 @@ The task YAML files require the following keys.
 
 - ``n_input`` (integer): number of test cases to be evaluated for this task; the actual test cases are retrieved from the :ref:`task directory <externalcontestformats_task-directory>`.
 
-- ``score_mode``: the score mode for the task, as in :ref:`configuringacontest_score`; it can be ``max_tokened_last`` (for the legacy behavior), or ``max`` (for the modern behavior).
+- ``score_mode``: the score mode for the task, as in :ref:`configuringacontest_score`; it can be ``max_tokened_last``, ``max``, or ``max_subtask``.
 
 - ``token_mode``: the token mode for the task, as in :ref:`configuringacontest_tokens`; it can be ``disabled``, ``infinite`` or ``finite``; if this is not specified, the loader will try to infer it from the remaining token parameters (in order to retain compatibility with the past), but you are not advised to relay on this behavior.