Source code for EduData.Task.KnowledgeTracing.format

# coding: utf-8
# create by tongshiwei on 2019-8-14

import io
import json

from tqdm import tqdm

__all__ = ["tl2json", "json2tl"]


[docs]def tl2json(src: str, tar: str, to_int=True, left_shift=False):
    """
    convert the dataset in `tl` sequence into `json` sequence

    .tl format
    The first line is the number of exercises a student attempted.
    The second line is the exercise tag sequence.
    The third line is the response sequence. ::

        15
        1,1,1,1,7,7,9,10,10,10,10,11,11,45,54
        0,1,1,1,1,1,0,0,1,1,1,1,1,0,0

    .json format
    Each sample contains several response elements, and each element is a two-element list.
    The first is the exercise tag and the second is the response. ::

        [[1,0],[1,1],[1,1],[1,1],[7,1],[7,1],[9,0],[10,0],[10,1],[10,1],[10,1],[11,1],[11,1],[45,0],[54,0]]

    """
    with open(src) as f, io.open(tar, "w", encoding="utf-8") as wf:
        for _ in tqdm(f):
            exercise_tags = f.readline().strip().strip(",").split(",")
            response_sequence = f.readline().strip().strip(",").split(",")
            if to_int:
                if not left_shift:
                    exercise_tags = list(map(int, exercise_tags))
                else:
                    exercise_tags = list(map(lambda x: int(x) - 1, exercise_tags))
                response_sequence = list(map(int, response_sequence))
            responses = list(zip(exercise_tags, response_sequence))
            print(json.dumps(responses), file=wf)


def json2tl(src, tar):
    with open(src) as f, io.open(tar, "w", encoding="utf-8") as wf:
        for line in tqdm(f):
            responses = json.loads(line)
            exercise_tags, response_sequence = zip(*responses)
            print(len(exercise_tags), file=wf)
            print(",".join(list(map(str, exercise_tags))), file=wf)
            print(",".join(list(map(str, response_sequence))), file=wf)


if __name__ == '__main__':
    json2tl("../data/junyi/student_log_kt.json.small.train", "../data/junyi/student_log_kt.json.small.train.tl")
    json2tl("../data/junyi/student_log_kt.json.small.test", "../data/junyi/student_log_kt.json.small.test.tl")