-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMetaParser.php
More file actions
110 lines (102 loc) · 3.24 KB
/
MetaParser.php
File metadata and controls
110 lines (102 loc) · 3.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
<?php
/**
* @MetaParser.php
* @author Gabriel John P. Gagno
* @company Stratpoint Technoologies, Inc.
* Date: 11/27/15
* Time: 1:45 PM
*/
namespace Parser;
use DOMDocument;
use DOMXPath;
/**
* Class MetaParser
* Class used to parse meta tags from html strings or files
* @package Parser
*/
class MetaParser
{
private static $tagName = 'meta';
/**
* wrapper for parsing using html-formatted strings
* @param $string
* @param null $names
* @return array
*/
public static function parseMetaTagsFromHtmlString($string, $names = NULL)
{
return MetaParser::parse($string, $names);
}
/**
* private function that generalizes parsing
* @param $string
* @param null $names
* @return array
*/
private static function parse($string, $names = NULL)
{
$dom = new DOMDocument();
libxml_use_internal_errors(true);
$dom->loadHTML($string);
$metas = $dom->getElementsByTagName('meta');
$metaArray = array();
if($names==NULL) {
foreach($metas as $meta) {
$nameKey = $meta->getAttribute('name');
$contentValue = $meta->getAttribute('content');
$metaArray["$nameKey"] = $contentValue;
}
}
else {
foreach($names as $name) {
foreach($metas as $meta) {
if($name===$meta->getAttribute('name')){
$nameKey = $meta->getAttribute('name');
$contentValue = $meta->getAttribute('content');
$metaArray["$nameKey"] = $contentValue;
}
}
}
}
return $metaArray;
}
public static function addSearchString() {
$sampArray = array(
"business.csv",
"shop.csv",
"community.csv",
"tattoo.csv",
"www.csv"
);
$conn = mysqli_connect('localhost', 'root', '', 'nutch');
if(!$conn) {
die('Not connected : ' . mysqli_error($conn));
}
$conn->autocommit(true);
foreach($sampArray as $sArray) {
$file = fopen($sArray, 'r');
$data = fgetcsv($file);
while(!feof($file)) {
$actual = null;
$data = fgetcsv($file);
if(substr($data[1], 0, strlen('http://')) === 'http://'){
$actual = str_replace('http://', '', $data[1]);
}
else if(substr($data[1], 0, strlen('https://')) === 'https://'){
$actual = str_replace('https://', '', $data[1]);
}
echo $actual;
$statement = "select id from webpage where baseUrl like \"%".$actual."%\"";
$results = $conn->query($statement);
while($row = mysqli_fetch_assoc($results)) {
echo "ROW ID: ".$row['id']."\n";
echo "DATA: ".$data[0]."\n\n";
$update = "update webpage set aq_md_searchstring=\"$data[0]\" where id like \"%".$row['id']."%\"";
$conn->query($update);
break;
}
}
fclose($file);
}
}
}