-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexample.php
More file actions
123 lines (92 loc) · 3.37 KB
/
example.php
File metadata and controls
123 lines (92 loc) · 3.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
<?php
/**
* Example usage file.
*
* @author Martin Porcheron <martin-uonpaperscraper@porcheron.uk>
* @license MIT
*/
require 'vendor/autoload.php';
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Timezone (PHP requirement)
\date_default_timezone_set('Europe/London');
// Research Group eStaffProfile directory
\define('URL_ESP', 'http://www.nottingham.ac.uk/research/groups/mixedrealitylab/people/index.aspx');
// Sleep time between publication scraping requests; if 0, you may crash the
// publications list appliance for the University website
\define('CRAWL_SLEEP', 5);
// Page title
\define('STR_TITLE', 'Publications');
// String for when no DOI is available
\define('STR_NO_DOI', 'No DOI number is available');
// First year to group publications from
\define('GRP_ST', 1990);
// Last year to group publications to
\define('GRP_END', 2020);
// How many years appear in each group
\define('GRP_INC', 5);
// Path for where to save publications by year (%s = year)
\define('PATH_YR', 'build/year/%s.html');
// Path for where to save publications by group (%s = last year, %s = first year)
\define('PATH_GRP', 'build/group/%s-%s.html');
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Logging level
\Porcheron\UonPaperScraper\Log::setLevel(\Porcheron\UonPaperScraper\Log::LOG_VERBOSE);
// Fetch all publications for all staff
$authors = new \Porcheron\UonPaperScraper\Authors(URL_ESP);
$pubs = $authors->publications(true, CRAWL_SLEEP);
if (empty($pubs)) {
die('No publications');
}
// Collate publications by year
$pubsByYear = [];
foreach ($pubs as &$pub) {
$year = $pub->year();
if (empty($year)) {
continue;
}
$doi = $pub->doi();
if (\is_null($doi)) {
$doi = STR_NO_DOI;
}
if (!isset($pubsByYear[$year])) {
$pubsByYear[$year] = [];
}
$cssClass = (count($pubsByYear[$year]) % 2) === 0 ? 'sys_alt' : '';
$html = \sprintf('<li title="%s" class="%s">', $doi, $cssClass);
$html .= $pub->html();
$html .= '</li>';
$pubsByYear[$year][] = $html;
}
unset($pub);
// Create seperate files for each year
foreach ($pubsByYear as $year => $pubs) {
$file = \sprintf(PATH_YR, $year);
$html = '<div id="lookup-publications" class="sys_profilePad ui-tabs-panel ui-widget-content ui-corner-bottom">';
$html .= '<ul class="sys_publicationsListing">';
$html .= \implode('', $pubsByYear[$year]);
$html .= '</ul></div>';
@\mkdir(\dirname($file), 0777, true);
\file_put_contents($file, $html);
}
// Create pages for groups for the website to reduce the total number of pages
$years = \range(GRP_ST, GRP_END, GRP_INC);
$numYears = \count($years) - 1;
for ($i = 0; $i < $numYears; $i++) {
$firstYear = $years[$i];
$lastYear = $years[$i+1]-1;
$html = '';
for ($year = $lastYear; $year >= $firstYear; $year--) {
$file = \sprintf(PATH_YR, $year);
if (\is_file($file)) {
$html .= '<h2 class="headingBackground">'. $year .'</h2>';
$html .= \file_get_contents($file);
}
}
if (empty($html)) {
continue;
}
$html = \sprintf('<h1>%s</h1>%s', STR_TITLE, $html);
$file = \sprintf(PATH_GRP, $lastYear, $firstYear);
@\mkdir(\dirname($file), 0777, true);
\file_put_contents($file, $html);
}