-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathGenovaV2.ps1
More file actions
executable file
·258 lines (195 loc) · 9.72 KB
/
GenovaV2.ps1
File metadata and controls
executable file
·258 lines (195 loc) · 9.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
<#This Script was written by Peter Manca on 2/19/2016
it was written for Genova to format the output from anydoc.
It will take all of the information on each level and place it on the same line with
the first image of each document.
Rough ex.
image1.tif|#|$|%|DE||D|D|C|D|D|D|
image2.tif|ED|D|R|D|$|D|F|G|H|F|F|
WIll BECOME
image1.tif|#|$|%|DE||D|D|C|D|D|D||ED|D|R|D|$|D|F|G|H|F|F|
image2.tif|
images can come in out of order but will be contigous to their docID
#>
<#
REV1
There was an issue with a null coming in or a || and a end of line and how it cleaned
up the extra pipes. ALL of the updated codes is tagged with #rev1.
THe first thing I did was force a space into the jagged array with
$tempString = $tempString -replace "\|\|","| |" #rev1
Then below in two different spots I updated the old line of code to have
more complex regex to clean up the pipies and then to revert the litteral
"space saver" The first line below with the # prepended was the old code
#$line1 = $line1 -replace "\|\|","|"
$line1 = $line1 -replace "\|{2,}$","|"#rev1
$line1 = $line1 -replace "\| \|", "||"#rev1
#>
cls
#Function to write out to output files
function Output-write([string]$logString, $logfile){
Add-Content -Path $logfile -Value $logString
}
#Grabs all the files
$files = gci "C:\Test\genova\" -Filter "*.txt"
#to cylce through all of the files
foreach($file in $files){
$name = $file.FullName #name of the file including
$count = 0
$values = New-Object system.Array[][] 1000,1000
$values.Initialize();
Get-Content -Path $name | foreach-object{
$tempString = $_ #assigns current element in pipe to tempstring element = line on file
##
$tempString = $tempString -replace "\|\|","| |" #rev1
##
#creates an array called Matches to hold each element within an element.
#Each line comes in as A|B|C|D|E|F| so array would hold @(A,B,C,D,E,F)
$Matches = $tempString.Split('|')
#count how many elements are on each line will be different for each line
$delimSectionTotalCount = $Matches.Count
#add to jagged array
for($i = 0; $i -lt $delimSectionTotalCount; $i++){
$values[$count][$i] = $Matches[$i]
}
#keep total count to use later
$count++
}#End of foreach-object loop
#################################################################################
<#
#Everything is in a jagged 2-dimensional array now
#
#time to move into a new array with the right format
#
#>
#Base name of the current file
$baseName = $file.BaseName
#output file
$outfile = "C:\test\genova\$baseName.out"
#instantiating oldDocID as none
$oldDocID = "none"
#Instantiating $strings array which will be used as an ordered array opposed to the original input
#This array will only hold one document(multiple images within 1 document) at a time
$strings = New-Object system.Array[][] 1000,1000
$strings.Initialize()
#For loop to do order and print out lines
for($i = 0; $i -lt $count; $i++){
#sets the docID variable
[string]$DocID = $values[$i][1].get(0)
#sets the PageNumber variable of the current image
[int]$pageNum = $values[$i][2].get(0)
if($DocID -eq $oldDocID -or $i -eq 0 ){#if the first image or any image that is in the current document
for($j=0;$j -lt $values[$i].Count; $j++){#to place arrays in order
$strings[$pageNum][$j] = $values[$i][$j]
write-host $strings[$pageNum][$j]
}
}else{
#means you have to do work on the ordered array now.
#If you are here it means that you have hit a new document and must process the old one
#setting global arrays
#imagenameArray will hold all of the image names in a document outside of the first image
$Global:imageNameArray = New-Object System.Collections.ArrayList
#variable to hold line 1 or image one details in a document
$Global:line1 = ""
#This for loop is to cycle through the array of Strings J will be the first dimension or for each image
for($j = 1;$j -lt 100;$j++){
$tempString = ""
#This for loop is to cyle through each element of each image or the second dimension of the strings array
for($k = 0;$k -lt 100; $k++){
#if the first time through
if($j -eq 1){
#the second and third column must be removed int he final out put.
#second is docID and third is page number
if($k -ne 1 -and $k -ne 2){
#Fills the temporary string with information
$tempString += $strings[$j][$k]
$tempString += '|'
write-host "K:"$k " TempString = " $tempString
}
}else{
#if it is not the first time through then it is not the first page as the strings array is ordered
#now the first coulumn must not be included as well as the second and the third
#first column is the image name and path which will be added to the imageNameArray below
if($k -ne 0 -and $k -ne 1 -and $k -ne 2){
$tempString += $strings[$j][$k]
$tempString += '|'
write-host "K:"$k " TempString = " $tempString
}elseif($k -eq 0){
#add to imageNameArray to printout later
$imageNameArray.Add($strings[$j][$k].Get(0))
}
}
}
#The next two lines will clean up all the extra pipes
#$tempString = $tempString -replace "\|\|",""
$tempString = $tempString -replace "\|{2,}$",""#rev1
$tempString += '|'
#provides a more permanent home for the temp string
$line1 += $tempString
}
#cleans up the unneeded pipes
#$line1 = $line1 -replace "\|\|","|"
$line1 = $line1 -replace "\|{2,}$","|"#rev1
$line1 = $line1 -replace "\| \|", "||"#rev1
#This next block of code will write to the output file
#line1 will be written first and then the image names will be
#afterwards one line at a time
Write-Host $line1
Output-write -logfile $outfile -logString $line1
for($j=0;$j -lt $imageNameArray.Count; $j++){
Write-Host $imageNameArray.Item($j)
$imageTempName = $imageNameArray.Item($j) + '|'
Output-write -logfile $outfile -logString $imageTempName
}
#clear the strings at the end also run the for loop in the if statement as well for the first array for the next docID
$strings = New-Object system.Array[][] 1000,1000
$strings.Initialize()
#First page received from next group for next set
for($j=0;$j -lt $values[$i].Count; $j++){#to place arrays in order
$strings[$pageNum][$j] = $values[$i][$j]
write-host $strings[$pageNum][$j]
}
}#end of else block
#sets long time DocID variable from the docID local to this iteration of loop.
#This is used to make sure images belong to the same document
$oldDocID = $DocID
}#end of for loop to work on jagged array
#end of document -> do same as above for final document group of file
#notes for each line are above
$Global:imageNameArray = New-Object System.Collections.ArrayList
$Global:line1 = ""
for($j = 1;$j -lt 10;$j++){#change 8 to 100 whenb done
$tempString = ""
for($k = 0;$k -lt 100; $k++){
if($j -eq 1){
if($k -ne 1 -and $k -ne 2){
$tempString += $strings[$j][$k]
$tempString += '|'
write-host "K:"$k " TempString = " $tempString
}
}else{
if($k -ne 0 -and $k -ne 1 -and $k -ne 2){
$tempString += $strings[$j][$k]
$tempString += '|'
write-host "K:"$k " TempString = " $tempString
}elseif($k -eq 0){
#add to imageNameArray to printout later
$imageNameArray.Add($strings[$j][$k].Get(0))
}
}
}
#$tempString = $tempString -replace "\|\|"
$tempString = $tempString -replace "\|{2,}$",""#rev1
$tempString += '|'
$line1 += $tempString
}
#cleans up the unneeded pipes
#$line1 = $line1 -replace "\|\|","|"
$line1 = $line1 -replace "\|{2,}$","|"#rev1
$line1 = $line1 -replace "\| \|", "||"#rev1
Write-Host $line1
Output-write -logfile $outfile -logString $line1
for($j=0;$j -lt $imageNameArray.Count; $j++){
Write-Host $imageNameArray.Item($j)
$imageTempName = $imageNameArray.Item($j) + '|'
Output-write -logfile $outfile -logString $imageTempName
}
}#end of foreach loop