csvclean_bisnow.sh
· 934 B · Bash
Eredeti
bucket="sftp-5j8tg.bisnow.net"
listfiles() {
aws s3 ls s3://$bucket
}
get_files_starting_from() {
listfiles |
awk -v ts="$1" '$1 >= ts {print $4}' |
sort -r |
grep "Daily_Order_and_Drop_Information"
}
rm -rf ./files
mkdir -p ./files
get_files_starting_from "2025-07-01" |
xargs -P 8 -n 1 -I {} aws s3 cp s3://$bucket/{} ./files
rm bad.txt
touch bad.txt
for f in ./files/*; do
# echo "FILE: $f"
LINES=$(csvcut -c "Campaign ID","PO Number","Campaign Name","Drop ID" "$f" |
csvgrep -c "Drop ID" -r "^[0-9]+$" -i |
wc -l)
if [ "$LINES" -gt 1 ]; then
echo "FILE: $f is missing a drop id"
echo "$f" >>bad.txt
fi
done
rm -rf fixed
mkdir -p fixed
for f in $(cat ./bad.txt); do
filename=$(echo "$f" | sed 's|./files/||')
csvgrep -c "Drop ID" -r "^[0-9]+$" "$f" >"./fixed/$filename"
aws s3 cp "./fixed/$filename" s3://$bucket/$filename
done
1 | bucket="sftp-5j8tg.bisnow.net" |
2 | |
3 | listfiles() { |
4 | aws s3 ls s3://$bucket |
5 | } |
6 | |
7 | get_files_starting_from() { |
8 | listfiles | |
9 | awk -v ts="$1" '$1 >= ts {print $4}' | |
10 | sort -r | |
11 | grep "Daily_Order_and_Drop_Information" |
12 | } |
13 | |
14 | rm -rf ./files |
15 | mkdir -p ./files |
16 | |
17 | get_files_starting_from "2025-07-01" | |
18 | xargs -P 8 -n 1 -I {} aws s3 cp s3://$bucket/{} ./files |
19 | |
20 | rm bad.txt |
21 | touch bad.txt |
22 | for f in ./files/*; do |
23 | # echo "FILE: $f" |
24 | LINES=$(csvcut -c "Campaign ID","PO Number","Campaign Name","Drop ID" "$f" | |
25 | csvgrep -c "Drop ID" -r "^[0-9]+$" -i | |
26 | wc -l) |
27 | |
28 | if [ "$LINES" -gt 1 ]; then |
29 | echo "FILE: $f is missing a drop id" |
30 | echo "$f" >>bad.txt |
31 | fi |
32 | done |
33 | |
34 | rm -rf fixed |
35 | mkdir -p fixed |
36 | for f in $(cat ./bad.txt); do |
37 | filename=$(echo "$f" | sed 's|./files/||') |
38 | csvgrep -c "Drop ID" -r "^[0-9]+$" "$f" >"./fixed/$filename" |
39 | aws s3 cp "./fixed/$filename" s3://$bucket/$filename |
40 | done |
41 |