mirror of
https://github.com/unanmed/ginka-generator.git
synced 2026-05-15 05:11:10 +08:00
32 lines
1.1 KiB
TypeScript
32 lines
1.1 KiB
TypeScript
import { readFile, writeFile } from 'fs-extra';
|
|
import { GinkaDataset } from './types';
|
|
import { chooseFrom } from './utils';
|
|
|
|
const [outputTrain, outputEval, input, ratioStr] = process.argv.slice(2);
|
|
const ratio = parseFloat(ratioStr);
|
|
|
|
(async () => {
|
|
const data = await readFile(input, 'utf-8');
|
|
const dataJSON = JSON.parse(data) as GinkaDataset;
|
|
const keys = Object.keys(dataJSON.data);
|
|
const length = keys.length;
|
|
const toEval = chooseFrom(keys, Math.floor(length * ratio));
|
|
const toTrain = [...new Set(keys).difference(new Set(toEval))];
|
|
const trainData: GinkaDataset = {
|
|
datasetId: Math.floor(Math.random() * 1e12),
|
|
data: {}
|
|
};
|
|
toTrain.forEach(v => {
|
|
trainData.data[v] = dataJSON.data[v];
|
|
});
|
|
const evalData: GinkaDataset = {
|
|
datasetId: Math.floor(Math.random() * 1e12),
|
|
data: {}
|
|
};
|
|
toEval.forEach(v => {
|
|
evalData.data[v] = dataJSON.data[v];
|
|
});
|
|
await writeFile(outputTrain, JSON.stringify(trainData), 'utf-8');
|
|
await writeFile(outputEval, JSON.stringify(evalData), 'utf-8');
|
|
})();
|