-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathProgram.cs
More file actions
108 lines (91 loc) · 4.37 KB
/
Program.cs
File metadata and controls
108 lines (91 loc) · 4.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
using System;
using System.IO;
using System.Runtime.CompilerServices;
using System.Text.RegularExpressions;
using System.Text.Encodings.Web;
using System.Text.Json;
using System.Linq;
namespace TokenDiscovery {
class Program {
static void Main(string[] args) {
Console.WriteLine("Starting\n");
string dataPath = DataDirectory();
var trainer = new Trainer();
trainer.Initialize();
var sourceText = File.ReadAllText(dataPath + "SourceText.txt");
sourceText = Regex.Replace(sourceText, "# .*", ""); // Strip out comment lines
trainer.ImportSourceText(sourceText);
/*
trainer.parser.RegisterExperiment("BasicWord", "<Letter! Letter+");
trainer.parser.RegisterExperiment("Number", "<(Digit | '.')! '-'? Digit+ ('.' Digit+)?");
trainer.parser.RegisterExperiment("Percent", "Number '%'");
trainer.parser.RegisterExperiment("ApostrophedWord", "BasicWord Apostrophe BasicWord");
trainer.parser.RegisterExperiment("Word", "ApostrophedWord | BasicWord | Number | Percent");
trainer.parser.RegisterExperiment("Dash", "Space '-'{2} Space | Space '-' Space | <'-'! '-'{2} | <'-'! '-'");
trainer.parser.RegisterExperiment("WordSeparator", "',' Space | ';' Space | ':' Space | Dash | Space");
trainer.parser.RegisterExperiment("Phrase", "<WordSeparator! (Word WordSeparator)* Word");
trainer.parser.RegisterExperiment("Sentence", "Phrase '.' Space?");
trainer.parser.RegisterExperiment("Paragraph", "<Sentence! Sentence+");
*/
trainer.Iterations = 4;
trainer.Train();
trainer.parser.Rename("Letters", "<Letter! Letter+");
trainer.parser.Rename("Words", "Letters (Space Letters)*");
trainer.parser.Rename("Period", "'.' Space");
/*
Console.WriteLine("#################### Patterns ####################");
foreach (var pattern in parser.Patterns.Values) {
//if (pattern.Type < PatternType.Derived) continue;
Console.WriteLine("- " + pattern.Identity + ": " + pattern.Describe());
//Console.WriteLine("- " + pattern.Identity + ": " + pattern.Describe(false, true));
}
Console.WriteLine();
Console.ReadLine();
*/
Console.WriteLine("\n@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ Finally @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
var paragraph = trainer.Paragraphs[0];
var chain = trainer.parser.Parse(paragraph);
File.WriteAllText(dataPath + "TokenChain.txt",
paragraph + "\n\n" +
chain.ToDebugString(PatternType.Derived, false)
);
if (chain.Tops.Count == 0) {
Console.WriteLine("No patterns match full paragraph\n");
}
File.WriteAllText(dataPath + "TokenTree.json",
"{\n" +
" \"SourceText\": " + JsonSerialize(paragraph) + ",\n" +
" \"Root\":\n\n" +
(chain.Tops.Count == 0 ? "null" : JsonSerialize(chain.Tops[0])) +
"\n\n}\n"
);
trainer.parser.SavePatterns(dataPath + "Patterns.txt");
Console.WriteLine("Done");
Console.ReadLine();
}
/// <summary>
/// Get the path to the data directory based on whether we are in debug or release mode
/// </summary>
private static string DataDirectory([CallerFilePath] string path = null) {
#if DEBUG
// We were given as input the path to this code file at compile time
path = Path.GetDirectoryName(path);
#else
// We will rely on the executable's directory
path = Environment.CurrentDirectory;
#endif
if (path.Contains("\\")) {
path += "\\Data\\";
} else {
path += "//Data//";
}
return path;
}
private static string JsonSerialize(object obj) {
return JsonSerializer.Serialize(obj, new JsonSerializerOptions {
WriteIndented = true,
Encoder = JavaScriptEncoder.UnsafeRelaxedJsonEscaping,
});
}
}
}