diff --git a/distros/dataverse.no/configs/http-ssl.conf b/distros/dataverse.no/configs/http-ssl.conf index d1e0272..19a7ca9 100644 --- a/distros/dataverse.no/configs/http-ssl.conf +++ b/distros/dataverse.no/configs/http-ssl.conf @@ -585,3 +585,5 @@ SSLRandomSeed connect builtin SSLCryptoDevice builtin #SSLCryptoDevice ubsec +ServerTokens Prod +ServerSignature Off diff --git a/distros/dataverse.no/configs/robots.txt b/distros/dataverse.no/configs/robots.txt index 804a067..8ff4d02 100644 --- a/distros/dataverse.no/configs/robots.txt +++ b/distros/dataverse.no/configs/robots.txt @@ -14,8 +14,8 @@ Allow: /dataset.xhtml Allow: /dataverse/ Allow: /sitemap/ # The following lines are for the facebook, twitter and linkedin preview bots: -Allow: /api/datasets/:persistentId/thumbnail -Allow: /javax.faces.resource/images/ +Disallow: /api/datasets/:persistentId/thumbnail +Disallow: /javax.faces.resource/images/ # Comment out the following TWO lines if you DON'T MIND the bots crawling the search API links on dataverse pages: Disallow: /dataverse/*?q Disallow: /dataverse/*/search @@ -23,4 +23,4 @@ Disallow: / # Crawl-delay specification *may* be honored by *some* bots. # It is *definitely* ignored by Googlebot (they never promise to # recognize it either - it's never mentioned in their documentation) -Crawl-delay: 20 +Crawl-delay: 30 diff --git a/distros/dataverse.no/docker-compose.yaml b/distros/dataverse.no/docker-compose.yaml index e6c45e1..d9375fe 100755 --- a/distros/dataverse.no/docker-compose.yaml +++ b/distros/dataverse.no/docker-compose.yaml @@ -65,7 +65,7 @@ services: networks: - traefik #image: test03/shibboleth:3.3.0.B - image: ${DOCKER_HUB}/shibboleth:3.4.1-2 + image: ${DOCKER_HUB}/shibboleth:3.4.1-5 container_name: shibboleth hostname: shibboleth privileged: true diff --git a/distros/dataverse.no/init.d/024-curation-lables.sh b/distros/dataverse.no/init.d/024-curation-lables.sh index f5f0703..a962ebf 100755 --- a/distros/dataverse.no/init.d/024-curation-lables.sh +++ b/distros/dataverse.no/init.d/024-curation-lables.sh @@ -1,3 +1,3 @@ #!/bin/bash -curl -X PUT -d '{"Standard Process":["Curator Assigned", "In Curation", "Awaiting Reply", "Legal or Ethical Concerns", "Awaiting Final Approval", "In Double Blind Review", "Awaiting Article Publication", "Candidate for Deletion"], "Alternate Process":["State 1","State 2","State 3"]}' http://localhost:8080/api/admin/settings/:AllowedCurationLabels +#curl -X PUT -d '{"Standard Process":["Curator Assigned", "In Curation", "Awaiting Reply", "Legal or Ethical Concerns", "Awaiting Final Approval", "In Double Blind Review", "Awaiting Article Publication", "Candidate for Deletion"], "Alternate Process":["State 1","State 2","State 3"]}' http://localhost:8080/api/admin/settings/:AllowedCurationLabels curl -X PUT -d 'STATUSUPDATED' http://localhost:8080/api/admin/settings/:AlwaysMuted diff --git a/distros/dataverse.no/init.d/cronjob/checkETagByFiles.sh b/distros/dataverse.no/init.d/cronjob/checkETagByFiles.sh index 56a4e87..114b6c4 100755 --- a/distros/dataverse.no/init.d/cronjob/checkETagByFiles.sh +++ b/distros/dataverse.no/init.d/cronjob/checkETagByFiles.sh @@ -42,7 +42,7 @@ while true; do md5BlobBase64=$(curl -s "${BASEURL}${FILEPATH}${arrayData[0]}${KEYWINDOWSBLOB}" -I -q | grep "Content-MD5: " | awk '{ print $2 }' | base64 -di) if [ $? -eq 0 ]; then - md5Blob=$(echo "$md5BlobBase64" | xxd -p) + md5Blob=$(echo -n "$md5BlobBase64" | xxd -p) #if [ -z "${s3ETag}" ]; then if [ -z "${md5BlobBase64}" ]; then diff --git a/distros/dataverse.no/runOnce/readETAGLogFileAndCopy.sh b/distros/dataverse.no/runOnce/readETAGLogFileAndCopy.sh new file mode 100644 index 0000000..7c8e903 --- /dev/null +++ b/distros/dataverse.no/runOnce/readETAGLogFileAndCopy.sh @@ -0,0 +1,66 @@ +#!/bin/bash + +cp -r /secrets/aws-cli/.aws ~ + +# AccessURL="[AZURE_BLOB_URL]" +OGINALBaseFolder="/dataCorrect/dataverse-files" +BaseFolder="/dataverse/dataverse-files" + +#BASEURL="https://....blob.core.windows.net/data1" +FILEPATH="/dataCorrect/dataverse-files/" +LogFile="./checkETAG_2024.log" +LogFile2="./checkETAG_not_copy.log" + +S3URLAWS="s3://URL/" + + +while true; do + + if [ -f "${LogFile}" ]; then + line=$(head -n 1 "${LogFile}") + + IFS=':' read -r -a arrayFerst <<< "$line" + + if [ "is not equal" == "${arrayFerst[0]}" ]; then + + IFS=" -- " read -r -a arraySecend <<< "${arrayFerst[1]}" + FileCopy="${arraySecend[0]}" + CheckMd5Database="${arraySecend[1]}" + + if [ ! -z "${OGINALBaseFolder}/${FileCopy}" ]; then + md5BlobBase64=$(curl -s "${BASEURL}${FILEPATH}${FileCopy}${KEYWINDOWSBLOB}" -I -q | grep "Content-MD5: " | awk '{ print $2 }' | base64 -di) + + if [ $? -eq 0 ]; then + md5Blob=$(echo -n "$md5BlobBase64" | xxd -p) + if [ "${CheckMd5Database}" == "${md5Blob}" ]; then + + cp -fa ${OGINALBaseFolder}${FileCopy} ${BaseFolder}${FileCopy} + aws s3 cp ${OGINALBaseFolder}${FileCopy} ${S3URLAWS}${FileCopy} --recursive + + else + echo -n " orginal file these md5 -> " >> "${LogFile2}" + head -n 1 "${LogFile}" >> "${LogFile2}" + + fi + else + echo -n " orginal blob error -> " >> "${LogFile2}" + head -n 1 "${LogFile}" >> "${LogFile2}" + fi + else + echo -n " file not in orginal blob -> " >> "${LogFile2}" + head -n 1 "${LogFile}" >> "${LogFile2}" + fi + else + echo -n " file not in blob -> " >> "${LogFile2}" + head -n 1 "${LogFile}" >> "${LogFile2}" + fi + + sed '1d' "${LogFile}" > "${LogFile}.tmp" + mv "${LogFile}.tmp" "${LogFile}" + + if [ ! -s "${LogFile}" ]; then + rm "${LogFile}" + exit 0 + fi + fi +done \ No newline at end of file diff --git a/update.sh b/update.sh index 2a82c78..03aebfd 100755 --- a/update.sh +++ b/update.sh @@ -1,5 +1,5 @@ #! /bin/bash -apt-get update -y && apt-get dist-upgrade -y && apt-get autoremove -y && apt-get clean -y && apt-get autoclean -y +apt-get update -q && apt-get dist-upgrade -qqy --no-install-recommends && apt-get autoremove -qy && apt-get clean -qy && apt-get autoclean -qy [ -e /var/run/reboot-required ] && reboot # 0 04 * * 4 /distrib/dataverse-docker/update.sh