-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathScanner.php
More file actions
128 lines (114 loc) · 3.68 KB
/
Scanner.php
File metadata and controls
128 lines (114 loc) · 3.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
<?php
class Scanner extends StateKeeper {
var $reddit;
var $thumbnailer;
var $forceRefresh;
function __construct($reddit, $tb, $forceRefresh = 300) {
parent::__construct(array());
$this->reddit = $reddit;
$this->thumbnailer = $tb;
$this->forceRefresh = $forceRefresh;
}
public function scan() {
// 1. get the subreddit main stories
$stories = $this->reddit->getStories();
if (getType($stories) != "array") {
throw new Exception("Blaaargh");
}
// 2. get the cached version
$cachedStories = $this->state;
// 3. this is going to be our next cached state
$newCachedStories = array();
// 4. loop through the new feed
$out = array();
foreach ($stories as $story) {
if ($story->kind != "t3") continue;
$data = $story->data;
$id = $data->id;
// did we know about this story earlier?
$refresh = false;
if (isset($cachedStories[$id])) {
$cs = $cachedStories[$id];
// has the number of comments changed?
if ($cs["num_comments"] == $data->num_comments) {
// no? how long has it been since we checked?
$delta = time() - $cs["refreshed"];
if ($delta>$this->forceRefresh) {
// it's been a while. refresh anyway.
print "Refreshing $id because it's been a while.\n";
$refresh = true;
}
} else {
// yes. refresh to find out what changed
print "Refreshing $id because num_comment has changed\n";
$refresh = true;
}
} else {
// new story? cool.
print "Refreshing $id because it's a new story.\n";
$refresh = true;
}
// update cache
$newCachedStories[$id] = array(
"num_comments" => $data->num_comments,
"refreshed" => $refresh?time():$cs["refreshed"],
"feed" => $cs["feed"]
);
// if necessary, fetch the comment feed
if ($refresh) {
$comments = $this->reddit->getComments($id);
$thumbs = array();
$this->scanComments($thumbs, $comments);
// sort comments so upvoted comments have priority for thumbnails
uasort($thumbs, array($this, 'commentCmp'));
$newCachedStories[$id]["feed"] = $thumbs;
} else {
$thumbs = $cs["feed"];
}
if (count($thumbs)>0) {
$out[$id] = $thumbs;
}
}
// commit cache to disk
$this->state = $newCachedStories;
// return something useful..
return $out;
}
function commentCmp($a, $b) {
$a = $a["score"];
$b = $b["score"];
return ($a == $b)?0:($a < $b)?1:-1;
}
protected function scanComments(&$thumbs, $comments) {
if (getType($comments) != "array") {
throw new Exception("comments is not an array. aborting");
}
foreach ($comments as $comment) {
$this->scanComment($thumbs, $comment->data);
}
}
protected function scanComment(&$thumbnails, $data) {
$body = $data->body;
$cid = $data->id;
$created = $data->created;
//extract URLs with some silly regexp..
preg_match_all("{(http://[^)\] \r\t\n]+\.(png|gif|jpg|jpeg)(\?[^)\] \r\t\n]*)?)}", $body, $out);
$urls = $out[1];
if (count($urls)>0) {
$url = $urls[0];
// get a thumbnail here.
$thumbnail = $this->thumbnailer->get($url);
$thumbnails[$cid] = array(
"id"=>$cid,
"url"=>$url,
"thumbnail"=>$thumbnail,
"sid"=>substr($data->link_id,3),
"score"=>((int)$data->ups)-((int)$data->downs));
//print "URL = $url, strlen(thumbnail)=".strlen($thumbnail)."\n";
}
// check for children
if ($data->replies) {
$this->scanComments($thumbnails, $data->replies->data->children);
}
}
}