From 900f3686a845fcf693cb451d81b720ece58121b6 Mon Sep 17 00:00:00 2001 From: shawticus Date: Wed, 26 Oct 2022 21:52:43 -0700 Subject: [PATCH 1/2] add local compile --- datasets/dataset-specs.js | 40 ++++++++++++++++++------ scripts/ai/compile_branch.sh | 28 +++++++++++++++++ scripts/ai/format-training-data-local.js | 18 +++++++++++ scripts/ai/format-training-data.js | 3 +- 4 files changed, 78 insertions(+), 11 deletions(-) create mode 100644 scripts/ai/compile_branch.sh create mode 100644 scripts/ai/format-training-data-local.js diff --git a/datasets/dataset-specs.js b/datasets/dataset-specs.js index e3e1e04..5317eb8 100644 --- a/datasets/dataset-specs.js +++ b/datasets/dataset-specs.js @@ -7,10 +7,26 @@ import { // getItemAttributeKeys, } from './dataset-parser.js'; +let fs; + // -const fetchText = async u => { - const res = await fetch(u); +const fetchText = async (u, local) => { + // dynamically load the fs module if we are processing local files in node + if (!fs && local) { + fs = await import('fs'); + } + let res; + + if (local) { + const data = fs.readFileSync(u); + res = data.toString(); + console.log('res is', res) + return res; + } else { + res = await fetch(u); + } + if (res.ok) { const text = await res.text(); return text; @@ -60,17 +76,23 @@ const mdSpecs = [ // descriptionKey: 'Candidate assets', }, ]; -const datasetSpecUrls = mdSpecs.map(mdSpec => `${datasetSpecsBasePath}${mdSpec.url}`); -const datasetDataUrls = mdSpecs.map(mdSpec => `${datasetDataBasePath}${mdSpec.url}`); +let datasetSpecUrls = mdSpecs.map(mdSpec => `${datasetSpecsBasePath}${mdSpec.url}`); +let datasetDataUrls = mdSpecs.map(mdSpec => `${datasetDataBasePath}${mdSpec.url}`); // let datasetSpecPromise = null; -export const getDatasetSpecs = () => { +export const getDatasetSpecs = (localPathOverride) => { + + if(localPathOverride){ + datasetSpecUrls = mdSpecs.map(mdSpec => `${localPathOverride}/specs/${mdSpec.url}`); + datasetDataUrls = mdSpecs.map(mdSpec => `${localPathOverride}/data/${mdSpec.url}`); + } + if (!datasetSpecPromise) { datasetSpecPromise = (async () => { const datasetSpecs = await Promise.all(datasetSpecUrls.map(async datasetSpecUrl => { - const mdText = await fetchText(datasetSpecUrl); + const mdText = await fetchText(datasetSpecUrl, !!localPathOverride); const datasetSpec = parseDatasetSpec(mdText); return datasetSpec; })); @@ -80,10 +102,10 @@ export const getDatasetSpecs = () => { return datasetSpecPromise; }; -export const getTrainingItems = async () => { - const datasetSpecs = await getDatasetSpecs(); +export const getTrainingItems = async (localPathOverride) => { + const datasetSpecs = await getDatasetSpecs(localPathOverride); const itemsArray = await Promise.all(datasetDataUrls.map(async (datasetDataUrl, index) => { - const mdText = await fetchText(datasetDataUrl); + const mdText = await fetchText(datasetDataUrl, !!localPathOverride); const datasetSpec = datasetSpecs[index]; let items = parseDatasetItems(mdText, datasetSpec); items = items.map(item => formatTrainingItemCandidates(item, datasetSpec)).flat(); diff --git a/scripts/ai/compile_branch.sh b/scripts/ai/compile_branch.sh new file mode 100644 index 0000000..8891de1 --- /dev/null +++ b/scripts/ai/compile_branch.sh @@ -0,0 +1,28 @@ +# check if lore folder exists + +echo "Compiling branch $1..." +echo "Checking if lore folder exists... you will need access to webaverse/lore for this to work" + +# if it doesn't, clone it +# if it does, reset and pull it +if [ ! -d "lore" ]; then + git clone https://github.com/webaverse/lore + cd lore +else + cd lore + git reset --hard + git pull +fi + +echo "Synced lore folder" + +# get the first arg passed to this script +# this is the branch name +branch=$1 + +# checkout branch +git checkout $branch + +cd .. + +node format-training-data-local.js ./lore/datasets \ No newline at end of file diff --git a/scripts/ai/format-training-data-local.js b/scripts/ai/format-training-data-local.js new file mode 100644 index 0000000..3057da1 --- /dev/null +++ b/scripts/ai/format-training-data-local.js @@ -0,0 +1,18 @@ +import { getTrainingItems } from '../../datasets/dataset-specs.js'; + +// get the base path from the command line args + +const basePath = process.argv[2] ?? "./"; + +console.log('basepath is') +console.log(basePath) +const _run = async (req, res) => { + const items = await getTrainingItems(basePath); + console.log('items are') + console.log(items) + process.stdout.write( + items.map(item => JSON.stringify(item)) + .join('\n') + ); +}; +_run(); \ No newline at end of file diff --git a/scripts/ai/format-training-data.js b/scripts/ai/format-training-data.js index 19fce14..a68c0d6 100644 --- a/scripts/ai/format-training-data.js +++ b/scripts/ai/format-training-data.js @@ -1,5 +1,4 @@ -import {capitalizeAllWords, isAllCaps} from '../../utils.js'; -import {getTrainingItems} from '../../datasets/dataset-specs.js'; +import { getTrainingItems } from '../../datasets/dataset-specs.js'; // From b8212217e7d166cc89bb8b921d60b2b83022a786 Mon Sep 17 00:00:00 2001 From: shawticus Date: Wed, 26 Oct 2022 21:53:45 -0700 Subject: [PATCH 2/2] Compiled branch --- scripts/ai/compile_branch.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/ai/compile_branch.sh b/scripts/ai/compile_branch.sh index 8891de1..af668ae 100644 --- a/scripts/ai/compile_branch.sh +++ b/scripts/ai/compile_branch.sh @@ -1,5 +1,7 @@ # check if lore folder exists +echo "Local compile script -- pass in the branch name from the lore repo as the first argument" + echo "Compiling branch $1..." echo "Checking if lore folder exists... you will need access to webaverse/lore for this to work" @@ -25,4 +27,6 @@ git checkout $branch cd .. -node format-training-data-local.js ./lore/datasets \ No newline at end of file +node format-training-data-local.js ./lore/datasets + +echo "Done!" \ No newline at end of file