aboutsummaryrefslogtreecommitdiff
blob: 06b9723637d4704bbbec2d7c48ae06126824b66d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
#!/bin/bash
# Copyright 2011-2019 Gentoo Authors; Distributed under the GPL v2
# might be earlier copyright, no history available

# 1) Create the tarball
# 2) Sanity check the tarball size and bail out if it appears abnormal.
# 3) Compress tarball(bzip2,xz)
# 4) sign
# 5) delta generation
# 6) create some symlinks
# 7) tidy up
# 8) clean up old deltas

umask 022
RSYNC_VARS=${RSYNC_VARS:=/usr/local/bin/mastermirror/rsync-gen.vars}
source "${RSYNC_VARS}" || exit 3

# locations (if used in multiple files, please move to rsync-gen.vars file)
MASTER="${FINALDIR_repo_gentoo}" # the master gentoo-x86 copy
TEMP="${BASE}/snapshots-tmp/" # working directory
HISTORY="7" #number in dsays of previous snapshots to keep
DELTA_UPLOAD="${UPLOAD}/deltas/"

[[ -d ${TEMP} ]]			|| mkdir ${TEMP}
[[ -d ${UPLOAD} ]]			|| mkdir ${UPLOAD}
[[ -d ${DELTA_UPLOAD} ]]		|| mkdir ${DELTA_UPLOAD}

write_time_log "---------------------------------------------------------"
write_time_log "START ENTIRE SCRIPT	$(date -u)"

# used to name the file
DELTA_BASE=$(/bin/date -d '-2 day' +%Y%m%d)
DELTA_FILENAME="portage-${DELTA_BASE}.tar.bz2"
YESTERDAY=$(/bin/date -d yesterday +%Y%m%d)
FILENAME="portage-${YESTERDAY}.tar.bz2"

FILENAME_NEW="gentoo-${YESTERDAY}.tar.xz"

# Parallel compressors can use a LOT of cpu, be nice about it
NICE="nice -n 10"

SIGNKEYID=${SIGNKEYID_snapshot}

# Verify signing key is available
if ! gpg --list-keys "${SIGNKEYID}" | grep -sq "${SIGNKEYID}"; then
	echo "${SIGNKEYID} not imported! exiting"
	exit 1
fi

# Use yesterdays snapshot, if its unavailable, use the 'latest' snapshot.
# If both are missing, bail out.
POTENTIAL_SNAPSHOTS=(
	${UPLOAD}/${DELTA_FILENAME}
	${UPLOAD}/portage-latest.tar.bz2
	FAIL
)

for previous_snapshot in ${POTENTIAL_SNAPSHOTS[*]}; do
	[[ -e ${previous_snapshot} ]] && break
done

if [[ ${previous_snapshot} == FAIL ]]; then
	# Cut FAIL out of the printed list, to ease debugging.
	echo "Previous snapshots do not exist: ${POTENTIAL_SNAPSHOTS[*]:0:2}"
	exit 1
fi

# Find fastest BZIP2
# lbzip2,pbzip2 default to multiple threads
# emerge-delta-webrsync relies on verifying signature of tarball after
# re-compressing it with bzip2, therefore failing if the signature
# was done on lbzip2 or pbzip2 compressed tarball, #573908
for BZIP2_PROG in bzip2 FAIL ; do
	[ -n "$(type $BZIP2_PROG 2>/dev/null)" ] && break
done
if [ $BZIP2_PROG == FAIL ];  then
	echo "Could not find any BZIP2" 1>&2
	exit 1
fi
# Find fastest XZ
# pixz appends some data and leads to SIGPIPE, #573642
for XZ_PROG in xz FAIL ; do
	[ -n "$(type $XZ_PROG 2>/dev/null)" ] && break
done
if [ $XZ_PROG == FAIL ];  then
	echo "Could not find any xz" 1>&2
	exit 1
fi
# Newer 'xz' supports threads as well, but defaults to single-threaded
if $XZ_PROG --help |grep -sq threads=NUM ; then
	# xz: Reduced the number of threads from 12 to 6 to not exceed the memory usage limit of 8,040 MiB
	nproc=$(nproc)
	[[ $nproc -gt 6 ]] && nproc=6
	XZ_PROG="${XZ_PROG} -T $nproc"
fi

# working dir
cd ${TEMP}

# Tarball generation
write_time_log "START TARBALL		$(date -u)"

# 1a) Tarball prep:
write_time_log "START TARBALL(prep) $(date -u)"
# Build exclusion list
EXCLUSION_LIST="$(mktemp -p ${TEMP} snapshot-exclude.XXXXXXXXXX)"
/usr/local/bin/mastermirror/print-exclusion-list.sh "${MASTER}" >"${EXCLUSION_LIST}"
write_time_log "END TARBALL(prep)		$(date -u)"

# 1b) Create the tarball
COMMON_TAR_OPTIONS=(
	# Force a small block size
	'--blocking-factor=1'
	'--record-size=512'
	# Tar format rationale:
	# ---------------------
	# Longest directory prefix, 94 chars:
	# gentoo-YYYYMMDD/profiles/default/linux/powerpc/ppc64/17.0/64bit-userland/little-endian/systemd/
	# Longest path, 140 chars:
	# gentoo-YYYYMMDD/gnome-extra/gnome-shell-extension-applications-overview-tooltip/gnome-shell-extension-applications-overview-tooltip-6.ebuild
	# Longest filename, 91 chars:
	# spirv-tools-2019.10_pre20191027-Respect-CMAKE_INSTALL_LIBDIR-in-installed-CMake-files.patch
	#
	# These length require that a tar format with unlimited length is used, the
	# split-255 layout of ustar is not gaurenteed to be long enough.
	#
	# The tar formats with unlimited length are: gnutar, posix
	#
	# The posix tar format has additional 1K overhead per entry, and the Gentoo
	# repo has ~160k entries (files & dirs), which adds up to 160M extra in the
	# tarball.
	# The differ/bdelta toolchain is also confirmed to work on posix tar
	# format.
	'--format=gnu'
	# Sorting by name produces consistent ordering and helps compression of
	# related content. Custom ordering might further improve ordering in future
	# (eg all metadata.xml first)
	'--sort=name'
	# Force ownership of content:
	'--owner=portage'
	'--group=portage'
	# Excluded content:
	'--no-wildcards'
	"--exclude-from=${EXCLUSION_LIST}"
	# Do not capture any xattr/acl info at all.
	'--no-acls'
	'--no-xattrs'
	'--no-selinux'
	# Include a volume ID for tracing
	# volume header is NOT supported by:
	# - Docker https://bugs.gentoo.org/631644
	# - tarsync https://bugs.gentoo.org/631616
	#"--label=${FILENAME%.bz2}"
	# do everything relative to the destination
	"--directory=${MASTER}"
)
# create the tarball and move it to the right location
write_time_log "START TARBALL(tar,old)		$(date -u)"
if [ ! -f "${FILENAME%.bz2}" ]; then
	OLD_TARBALL_OPTIONS=(
		# The . needs to match the file argument
		"--transform=s,^\\.,portage,g"
		# The operation, destination, source arguments
		'--create'
		"--file=${FILENAME%.bz2}"
		.
	)
	tar \
		"${COMMON_TAR_OPTIONS[@]}" \
		"${OLD_TARBALL_OPTIONS[@]}"
	rc=$?
	if [ $rc -ne 0 ]; then
		echo "TARBALL(tar,old) failed!"
		echo "TARBALL(tar,old) failed!" 1>&2
		exit 1
	fi
fi
write_time_log "END TARBALL(tar,old)		$(date -u)"
write_time_log "START TARBALL(umd5,old)		$(date -u)"
if [ ! -f " ${FILENAME}.umd5sum" ]; then
	md5sum ${FILENAME%.bz2} | tee ${FILENAME%.bz2}.bz2.umd5sum ${FILENAME%.bz2}.xz.umd5sum >/dev/null
fi
write_time_log "END TARBALL(umd5,old)		$(date -u)"
# end 1b)

# 1c) Also create new-style tarball,
# but do it via transformation of old tarball
write_time_log "START TARBALL(tar,new) $(date -u)"
if [ ! -f "${FILENAME_NEW%.xz}" ]; then


	# The newer tarball differs in the following ways:
	# Filename 'portage-YYYYMMDD' -> 'gentoo-YYYYMMDD'
	# Path prefex 'portage' -> 'gentoo-YYYYMMDD'
	#
	# Earlier code used tar-transform-names.pl as a wrapper around Perl
	# Archive::Tar::Stream, but it was found that the Archive::Tar::Stream
	# codebase did not handle gnutar format correctly.
	# https://bugs.gentoo.org/703460
	#
	# While the stream processing was MUCH faster (because it didn't traverse
	# the filesystem at all), it broke on very long filenames that ARE present
	# in the Gentoo repo:
	#/usr/local/bin/mastermirror//tar-transform-names.pl \
	#	--input-file "${FILENAME%.bz2}" \
	#	--output-file "${FILENAME_NEW%.xz}" \
	#	--regex "s/^portage/${FILENAME_NEW%%.*}/"

	NEW_TARBALL_OPTIONS=(
		# The . needs to match the file argument
		"--transform=s,^\\.,${FILENAME_NEW%%.*},g"
		# The operation, destination, source arguments
		'--create'
		"--file=${FILENAME_NEW%.xz}"
		.
	)
	tar \
		"${COMMON_TAR_OPTIONS[@]}" \
		"${NEW_TARBALL_OPTIONS[@]}"
	rc=$?
	if [ $rc -ne 0 ]; then
		echo "TARBALL(tar,new) failed!"
		echo "TARBALL(tar,new) failed!" 1>&2
		exit 1
	fi
fi
write_time_log "END TARBALL(tar,new) $(date -u)"
write_time_log "START TARBALL(umd5,new)		$(date -u)"
if [ ! -f " ${FILENAME_NEW}.umd5sum" ]; then
	md5sum ${FILENAME_NEW%.xz} > ${FILENAME_NEW}.umd5sum
fi
write_time_log "END TARBALL(umd5,new)		$(date -u)"
# end 1c)

# end 1)
write_time_log "END TARBALL		$(date -u)"

# 2) Sanity check the tarball size and bail out if it appears abnormal.
write_time_log "START SIZE SANITY	$(date -u)"
current_size=$(stat -c '%s' "${FILENAME%.bz2}")
if [ "${previous_snapshot}" != "${previous_snapshot%.bz2}" ] ; then
	previous_size=$($BZIP2_PROG -k -d -c "${previous_snapshot}" |wc -c)
elif [ "${previous_snapshot}" != "${previous_snapshot%.xz}" ] ; then
	previous_size=$($XZ_PROG -k -d -c "${previous_snapshot}" |wc -c)
elif [ "${previous_snapshot}" != "${previous_snapshot%.tar}" ] ; then
	previous_size=$(stat -c '%s' "${previous_snapshot}")
else
	previous_size=0
fi
if [ -z "$current_size" ] || [ "${current_size}" -lt "${previous_size}" ]; then
	size_difference=$(expr ${previous_size} - ${current_size})
	difference_ratio=$(expr ${previous_size} / ${size_difference})
	if [ ${difference_ratio} -lt 2 ]; then
		echo "Snapshot size has decreased by more than 50% in one day!!!"
		echo "${FILENAME} ${current_size} bytes"
		echo "${previous_snapshot} ${previous_size} bytes"
		exit 1
	elif [ ${difference_ratio} -lt 5 ]; then
		echo "Snapshot size has decreased by more than 20% in one day!!!"
		echo "${FILENAME} ${current_size} bytes"
		echo "${previous_snapshot} ${previous_size} bytes"
		# Make this non-fatal while we recover more space.
		#exit 1
	fi
fi
write_time_log "END SIZE SANITY		$(date -u)"
# end 2)

# 3) create xz tarball
write_time_log "START COMPRESS		$(date -u)"
# This happens only AFTER the sanity check
# 3a) bzip2 of old tarball
write_time_log "START COMPRESS(bzip2,old)		$(date -u)"
[ ! -f "${FILENAME%.bz2}.bz2" ] && ${NICE} $BZIP2_PROG -k9 ${FILENAME%.bz2}
write_time_log "END COMPRESS(bzip2,old)		$(date -u)"
# 3b) xz of old tarball
write_time_log "START COMPRESS(xz,old) $(date -u)"
if [ ! -f "${FILENAME%.*}.xz" ] ; then
	# pixz, pxz, xz all differ in filename generation
	# xz: .tar -> .tar.xz
	# pixz: .tar -> .tpxz
	# pxz: .tar -> .txz
	#
	# To avoid this, be explicit by using IO.
	${NICE} ${XZ_PROG} -9 -e <"${FILENAME%.*}" >"${FILENAME%.*}.xz" || exit $?
fi
write_time_log "END COMPRESS(xz,old)			$(date -u)"
# 3c) xz of new tarball
write_time_log "START COMPRESS(xz,new) $(date -u)"
if [ ! -f "${FILENAME_NEW%.*}.xz" ] ; then
	# pixz, pxz, xz all differ in filename generation
	# xz: .tar -> .tar.xz
	# pixz: .tar -> .tpxz
	# pxz: .tar -> .txz
	#
	# To avoid this, be explicit by using IO.
	${NICE} ${XZ_PROG} -9 -e <"${FILENAME_NEW%.*}" >"${FILENAME_NEW%.*}.xz" || exit $?
fi
write_time_log "END COMPRESS(xz,new)			$(date -u)"
write_time_log "END COMPRESS		$(date -u)"
# end 3)

# 4) sign
write_time_log "START SIGN		$(date -u)"
for f in "${FILENAME}" "${FILENAME%.*}".xz "${FILENAME_NEW}"; do
	if [ ! -f "${UPLOAD}${f}".umd5sum ]; then
		cp "${f}".umd5sum "${UPLOAD}${f}".umd5sum || exit $?
		md5sum "$f" > "$f".md5sum || exit $?
	fi
	if [ ! -f "$f".gpgsig ]; then
		gpg --batch -u "${SIGNKEYID}" --armor --detach-sign \
			--output "$f".gpgsig "$f" || exit $?
	fi
	mv "$f" "$f".md5sum "$f".gpgsig "${UPLOAD}"/ || exit $?
done
write_time_log "END SIGN		$(date -u)"
# end 4)

# 5) delta generation
# Delta is not generated for new filename at this time
write_time_log "START DELTA		$(date -u)"
PATCH=snapshot-${DELTA_BASE}-${YESTERDAY}.patch.bz2
if [ ! -f "${PATCH}" ]; then
	SNAPSHOT_BASE="${TEMP}"/$(basename "${previous_snapshot%.bz2}")-snapshotbase
	${NICE} ${BZIP2_PROG} -dkc "${previous_snapshot}" > "${SNAPSHOT_BASE}"
	/usr/bin/differ -f bdelta \
		"${SNAPSHOT_BASE}" \
		"${FILENAME%.bz2}" \
		"${PATCH%.bz2}"
	${NICE} ${BZIP2_PROG} -9 "${PATCH%.bz2}"
	md5sum "${PATCH}" > "${PATCH}.md5sum"
	chmod 644 "${PATCH}"{,.md5sum}
	mv "${PATCH}"{,.md5sum} "${DELTA_UPLOAD}"
fi
write_time_log "END DELTA		$(date -u)"
# end 5)

# 6) create some symlinks
write_time_log "START SYMLINK		$(date -u)"
cd ${UPLOAD}
for f in "${FILENAME}" "${FILENAME%.*}".xz ; do
	ext=${f##*.}
	ln -sf "$f" "${UPLOAD}"portage-latest.tar.${ext} || exit $?
	rm -f "${UPLOAD}"portage-latest.tar.${ext}.md5sum || exit $?
	sed "s/${f}\$/portage-latest.tar.${ext}/" "${UPLOAD}"${f}.md5sum > \
		"${UPLOAD}"portage-latest.tar.${ext}.md5sum || exit $?
	ln -sf "${f}".gpgsig "${UPLOAD}"portage-latest.tar.${ext}.gpgsig || exit $?
done
# shellcheck disable=SC2066
for f in "${FILENAME_NEW}" ; do
	ext=${f##*.}
	ln -sf "$f" "${UPLOAD}"gentoo-latest.tar.${ext} || exit $?
	rm -f "${UPLOAD}"gentoo-latest.tar.${ext}.md5sum || exit $?
	sed "s/${f}\$/gentoo-latest.tar.${ext}/" "${UPLOAD}"${f}.md5sum > \
		"${UPLOAD}"gentoo-latest.tar.${ext}.md5sum || exit $?
	ln -sf "${f}".gpgsig "${UPLOAD}"gentoo-latest.tar.${ext}.gpgsig || exit $?
done
write_time_log "END SYMLINK		$(date -u)"
# end 6)

# 7) tidy up
write_time_log "START CLEANUP		$(date -u)"

# Cleanup immediate stuff
rm -f \
	"${TEMP}"/orig \
	"${TEMP}"/"${FILENAME%.bz2}"{.bz2,.xz,}{,.umd5sum} \
	"${TEMP}"/"${FILENAME_NEW%.xz}"{.bz2,.xz,}{,.umd5sum} \
	"${TEMP}"/"${EXCLUSION_LIST}" \
	|| exit $?

# Cleanup older stuff that might have leaked
/usr/bin/find \
	"${TEMP}" \
	-regextype egrep \
	-maxdepth 1 \
	-type f \
	-mtime +0 \
	\( \
		-regex '.*/(portage|gentoo)-[0-9]{6}.*' \
		-o \
		-name 'snapshot-exclude.*' \
		-o \
		-name orig \
	\) \
	-delete

# Cleanup old snapshots
/usr/bin/find \
	"${UPLOAD}" \
	-maxdepth 1 \
	-type f \
	-mtime +${HISTORY} \
	-delete

write_time_log "END CLEANUP		$(date -u)"
# end 7)

# 8) clean up old deltas
# Delta is not generated for new filename at this time
write_time_log "START CLEANUP DELTA	$(date -u)"
/usr/local/bin/mastermirror/clean-old-deltas.py "${DELTA_UPLOAD}" "${YESTERDAY}" $(stat -c '%s' "${UPLOAD}/${FILENAME}") > /dev/null
write_time_log "END CLEANUP DELTA	$(date -u)"
# end 8)
write_time_log "END ENTIRE SCRIPT	$(date -u)"