From b481511a214613bfc11073e29834dc856542bbe2 Mon Sep 17 00:00:00 2001 From: Louis-wr <85620187+Louis-wr@users.noreply.github.com> Date: Wed, 8 May 2024 08:04:39 +0000 Subject: [PATCH 1/9] fixed check etag and removed apache --- distros/dataverse.no/configs/http-ssl.conf | 2 ++ distros/dataverse.no/init.d/cronjob/checkETagByFiles.sh | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/distros/dataverse.no/configs/http-ssl.conf b/distros/dataverse.no/configs/http-ssl.conf index d1e0272..19a7ca9 100644 --- a/distros/dataverse.no/configs/http-ssl.conf +++ b/distros/dataverse.no/configs/http-ssl.conf @@ -585,3 +585,5 @@ SSLRandomSeed connect builtin SSLCryptoDevice builtin #SSLCryptoDevice ubsec +ServerTokens Prod +ServerSignature Off diff --git a/distros/dataverse.no/init.d/cronjob/checkETagByFiles.sh b/distros/dataverse.no/init.d/cronjob/checkETagByFiles.sh index 56a4e87..114b6c4 100755 --- a/distros/dataverse.no/init.d/cronjob/checkETagByFiles.sh +++ b/distros/dataverse.no/init.d/cronjob/checkETagByFiles.sh @@ -42,7 +42,7 @@ while true; do md5BlobBase64=$(curl -s "${BASEURL}${FILEPATH}${arrayData[0]}${KEYWINDOWSBLOB}" -I -q | grep "Content-MD5: " | awk '{ print $2 }' | base64 -di) if [ $? -eq 0 ]; then - md5Blob=$(echo "$md5BlobBase64" | xxd -p) + md5Blob=$(echo -n "$md5BlobBase64" | xxd -p) #if [ -z "${s3ETag}" ]; then if [ -z "${md5BlobBase64}" ]; then From 051981b664b3922548d74e6f8a958f861a944df3 Mon Sep 17 00:00:00 2001 From: Louis-wr <85620187+Louis-wr@users.noreply.github.com> Date: Wed, 8 May 2024 09:32:35 +0000 Subject: [PATCH 2/9] added -q and --no-install-recommends --- update.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/update.sh b/update.sh index 2a82c78..03aebfd 100755 --- a/update.sh +++ b/update.sh @@ -1,5 +1,5 @@ #! /bin/bash -apt-get update -y && apt-get dist-upgrade -y && apt-get autoremove -y && apt-get clean -y && apt-get autoclean -y +apt-get update -q && apt-get dist-upgrade -qqy --no-install-recommends && apt-get autoremove -qy && apt-get clean -qy && apt-get autoclean -qy [ -e /var/run/reboot-required ] && reboot # 0 04 * * 4 /distrib/dataverse-docker/update.sh From 88b4c22eda4f454feb44e9dcf3f1f4904c67b0cc Mon Sep 17 00:00:00 2001 From: Benedikt Meier Date: Wed, 8 May 2024 16:22:39 +0200 Subject: [PATCH 3/9] update read etag log file --- .../runOnce/readETAGLogFileAndCopy.sh | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 distros/dataverse.no/runOnce/readETAGLogFileAndCopy.sh diff --git a/distros/dataverse.no/runOnce/readETAGLogFileAndCopy.sh b/distros/dataverse.no/runOnce/readETAGLogFileAndCopy.sh new file mode 100644 index 0000000..8775f73 --- /dev/null +++ b/distros/dataverse.no/runOnce/readETAGLogFileAndCopy.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +cp -r /secrets/aws-cli/.aws ~ + +# AccessURL="[AZURE_BLOB_URL]" +OGINALBaseFolder="/dataCorrect/dataverse-files" +BaseFolder="/dataverse/dataverse-files" + +#BASEURL="https://....blob.core.windows.net/data1" +FILEPATH="/dataCorrect/dataverse-files/" +LogFile="./checkETAG_2024.log" + + +while true; do + + if [ -f "${LogFile}" ]; then + line=$(head -n 1 "${LogFile}") + + IFS=':' read -r -a arrayFerst <<< "$line" + + if [ "is not equal" == "${arrayFerst[0]}" ]; then + + IFS=" -- " read -r -a arraySecend <<< "${arrayFerst[1]}" + FileCopy="${arraySecend[0]}" + CheckMd5Database="${arraySecend[1]}" + + if [ ! -z ${OGINALBaseFolder}/"${FileCopy}" ]; then + md5BlobBase64=$(curl -s "${BASEURL}${FILEPATH}${FileCopy}${KEYWINDOWSBLOB}" -I -q | grep "Content-MD5: " | awk '{ print $2 }' | base64 -di) + + if [ $? -eq 0 ]; then + md5Blob=$(echo -n "$md5BlobBase64" | xxd -p) + if [ "${CheckMd5Database}" == "${md5Blob}" ]; then + + cp -fa ${OGINALBaseFolder}${FileCopy} ${BaseFolder}${FileCopy} + aws s3 cp ${OGINALBaseFolder}${FileCopy} s3://URL/ --recursive + + sed '1d' "${LogFile}" > "${LogFile}.tmp" + mv "${LogFile}.tmp" "${LogFile}" + fi + fi + fi + fi + + if [ ! -s "${LogFile}" ]; then + rm "${LogFile}" + exit 0 + fi + fi +done \ No newline at end of file From 4126b4601f096e2a1b398a1853cd68f83e1c2752 Mon Sep 17 00:00:00 2001 From: Benedikt Meier Date: Wed, 8 May 2024 16:32:12 +0200 Subject: [PATCH 4/9] change aws url --- distros/dataverse.no/runOnce/readETAGLogFileAndCopy.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/distros/dataverse.no/runOnce/readETAGLogFileAndCopy.sh b/distros/dataverse.no/runOnce/readETAGLogFileAndCopy.sh index 8775f73..676a4a9 100644 --- a/distros/dataverse.no/runOnce/readETAGLogFileAndCopy.sh +++ b/distros/dataverse.no/runOnce/readETAGLogFileAndCopy.sh @@ -10,6 +10,8 @@ BaseFolder="/dataverse/dataverse-files" FILEPATH="/dataCorrect/dataverse-files/" LogFile="./checkETAG_2024.log" +S3URLAWS="s3://URL/" + while true; do @@ -32,7 +34,7 @@ while true; do if [ "${CheckMd5Database}" == "${md5Blob}" ]; then cp -fa ${OGINALBaseFolder}${FileCopy} ${BaseFolder}${FileCopy} - aws s3 cp ${OGINALBaseFolder}${FileCopy} s3://URL/ --recursive + aws s3 cp ${OGINALBaseFolder}${FileCopy} ${S3URLAWS}${FileCopy} --recursive sed '1d' "${LogFile}" > "${LogFile}.tmp" mv "${LogFile}.tmp" "${LogFile}" From 00df889273c5d80994debf739de649ee76366185 Mon Sep 17 00:00:00 2001 From: Benedikt Meier Date: Tue, 14 May 2024 11:19:04 +0200 Subject: [PATCH 5/9] change " one --- .../runOnce/readETAGLogFileAndCopy.sh | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/distros/dataverse.no/runOnce/readETAGLogFileAndCopy.sh b/distros/dataverse.no/runOnce/readETAGLogFileAndCopy.sh index 676a4a9..7c8e903 100644 --- a/distros/dataverse.no/runOnce/readETAGLogFileAndCopy.sh +++ b/distros/dataverse.no/runOnce/readETAGLogFileAndCopy.sh @@ -9,6 +9,7 @@ BaseFolder="/dataverse/dataverse-files" #BASEURL="https://....blob.core.windows.net/data1" FILEPATH="/dataCorrect/dataverse-files/" LogFile="./checkETAG_2024.log" +LogFile2="./checkETAG_not_copy.log" S3URLAWS="s3://URL/" @@ -26,7 +27,7 @@ while true; do FileCopy="${arraySecend[0]}" CheckMd5Database="${arraySecend[1]}" - if [ ! -z ${OGINALBaseFolder}/"${FileCopy}" ]; then + if [ ! -z "${OGINALBaseFolder}/${FileCopy}" ]; then md5BlobBase64=$(curl -s "${BASEURL}${FILEPATH}${FileCopy}${KEYWINDOWSBLOB}" -I -q | grep "Content-MD5: " | awk '{ print $2 }' | base64 -di) if [ $? -eq 0 ]; then @@ -36,13 +37,27 @@ while true; do cp -fa ${OGINALBaseFolder}${FileCopy} ${BaseFolder}${FileCopy} aws s3 cp ${OGINALBaseFolder}${FileCopy} ${S3URLAWS}${FileCopy} --recursive - sed '1d' "${LogFile}" > "${LogFile}.tmp" - mv "${LogFile}.tmp" "${LogFile}" + else + echo -n " orginal file these md5 -> " >> "${LogFile2}" + head -n 1 "${LogFile}" >> "${LogFile2}" + fi + else + echo -n " orginal blob error -> " >> "${LogFile2}" + head -n 1 "${LogFile}" >> "${LogFile2}" fi + else + echo -n " file not in orginal blob -> " >> "${LogFile2}" + head -n 1 "${LogFile}" >> "${LogFile2}" fi + else + echo -n " file not in blob -> " >> "${LogFile2}" + head -n 1 "${LogFile}" >> "${LogFile2}" fi + sed '1d' "${LogFile}" > "${LogFile}.tmp" + mv "${LogFile}.tmp" "${LogFile}" + if [ ! -s "${LogFile}" ]; then rm "${LogFile}" exit 0 From 281339a8d5c5684f4e34eca1ec06aa24859cb177 Mon Sep 17 00:00:00 2001 From: Louis-wr <85620187+Louis-wr@users.noreply.github.com> Date: Wed, 15 May 2024 10:35:51 +0000 Subject: [PATCH 6/9] apache update --- distros/dataverse.no/docker-compose.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/distros/dataverse.no/docker-compose.yaml b/distros/dataverse.no/docker-compose.yaml index e6c45e1..bf21548 100755 --- a/distros/dataverse.no/docker-compose.yaml +++ b/distros/dataverse.no/docker-compose.yaml @@ -65,7 +65,7 @@ services: networks: - traefik #image: test03/shibboleth:3.3.0.B - image: ${DOCKER_HUB}/shibboleth:3.4.1-2 + image: ${DOCKER_HUB}/shibboleth:3.4.1-3 container_name: shibboleth hostname: shibboleth privileged: true From 364001efe2d8a2e2c94041258579dd906b7df427 Mon Sep 17 00:00:00 2001 From: Louis-wr <85620187+Louis-wr@users.noreply.github.com> Date: Mon, 1 Jul 2024 12:18:02 +0000 Subject: [PATCH 7/9] removed automatic setings of curation labels --- distros/dataverse.no/init.d/024-curation-lables.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/distros/dataverse.no/init.d/024-curation-lables.sh b/distros/dataverse.no/init.d/024-curation-lables.sh index f5f0703..a962ebf 100755 --- a/distros/dataverse.no/init.d/024-curation-lables.sh +++ b/distros/dataverse.no/init.d/024-curation-lables.sh @@ -1,3 +1,3 @@ #!/bin/bash -curl -X PUT -d '{"Standard Process":["Curator Assigned", "In Curation", "Awaiting Reply", "Legal or Ethical Concerns", "Awaiting Final Approval", "In Double Blind Review", "Awaiting Article Publication", "Candidate for Deletion"], "Alternate Process":["State 1","State 2","State 3"]}' http://localhost:8080/api/admin/settings/:AllowedCurationLabels +#curl -X PUT -d '{"Standard Process":["Curator Assigned", "In Curation", "Awaiting Reply", "Legal or Ethical Concerns", "Awaiting Final Approval", "In Double Blind Review", "Awaiting Article Publication", "Candidate for Deletion"], "Alternate Process":["State 1","State 2","State 3"]}' http://localhost:8080/api/admin/settings/:AllowedCurationLabels curl -X PUT -d 'STATUSUPDATED' http://localhost:8080/api/admin/settings/:AlwaysMuted From 2666975efd9ba4f5368cb49dd670fa97fa634e59 Mon Sep 17 00:00:00 2001 From: Louis-wr <85620187+Louis-wr@users.noreply.github.com> Date: Tue, 16 Jul 2024 13:03:25 +0000 Subject: [PATCH 8/9] updated shibboleth --- distros/dataverse.no/docker-compose.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/distros/dataverse.no/docker-compose.yaml b/distros/dataverse.no/docker-compose.yaml index bf21548..d9375fe 100755 --- a/distros/dataverse.no/docker-compose.yaml +++ b/distros/dataverse.no/docker-compose.yaml @@ -65,7 +65,7 @@ services: networks: - traefik #image: test03/shibboleth:3.3.0.B - image: ${DOCKER_HUB}/shibboleth:3.4.1-3 + image: ${DOCKER_HUB}/shibboleth:3.4.1-5 container_name: shibboleth hostname: shibboleth privileged: true From 10ee81a08b7d8628cfa2ce0e97ba3f672430f979 Mon Sep 17 00:00:00 2001 From: Louis-wr <85620187+Louis-wr@users.noreply.github.com> Date: Thu, 5 Sep 2024 13:52:38 +0200 Subject: [PATCH 9/9] changed robot.txt so s3 has less requests --- distros/dataverse.no/configs/robots.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/distros/dataverse.no/configs/robots.txt b/distros/dataverse.no/configs/robots.txt index 804a067..8ff4d02 100644 --- a/distros/dataverse.no/configs/robots.txt +++ b/distros/dataverse.no/configs/robots.txt @@ -14,8 +14,8 @@ Allow: /dataset.xhtml Allow: /dataverse/ Allow: /sitemap/ # The following lines are for the facebook, twitter and linkedin preview bots: -Allow: /api/datasets/:persistentId/thumbnail -Allow: /javax.faces.resource/images/ +Disallow: /api/datasets/:persistentId/thumbnail +Disallow: /javax.faces.resource/images/ # Comment out the following TWO lines if you DON'T MIND the bots crawling the search API links on dataverse pages: Disallow: /dataverse/*?q Disallow: /dataverse/*/search @@ -23,4 +23,4 @@ Disallow: / # Crawl-delay specification *may* be honored by *some* bots. # It is *definitely* ignored by Googlebot (they never promise to # recognize it either - it's never mentioned in their documentation) -Crawl-delay: 20 +Crawl-delay: 30