![awk 셀 값을 기준으로 행 병합](https://linux55.com/image/118620/awk%20%EC%85%80%20%EA%B0%92%EC%9D%84%20%EA%B8%B0%EC%A4%80%EC%9C%BC%EB%A1%9C%20%ED%96%89%20%EB%B3%91%ED%95%A9.png)
awk를 사용하여 열 1,2,3,4,5,6,7과 일치하는 행을 병합하고 그 중 열 8의 고유 항목을 병합하고 싶습니다.
입력은
AB-DB1,No_IP_Found,11/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/avqln1ic_1_1,infinity ,0014L5
AB-DB1,No_IP_Found,11/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/avqln1ic_1_1,infinity ,0015L5;0017L5
AB-DB1,No_IP_Found,11/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/avqln1ic_1_1,infinity ,0017L5;0019L5
AB-DB1,No_IP_Found,11/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/avqln1ic_1_1,infinity ,0019L5
AB-DB1,No_IP_Found,11/01/2015,INFINITY,Adhoc-ab-db1-Sunset,DB_Backup/Archived/Offline,infinity ,No_Media_Used/Disk
AB-DB1,No_IP_Found,11/01/2015,INFINITY,Adhoc-ab-db1-Sunset,DB_Backup/Archived/Offline,infinity ,No_Media_Used/Disk
AB-DB1,No_IP_Found,11/01/2015,INFINITY,Adhoc-ab-db1-Sunset,DB_Backup/Archived/Offline,infinity ,No_Media_Used/Disk
AB-DB1,No_IP_Found,10/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/c-3885516020-20151008-00,infinity ,0007L5
AB-DB1,No_IP_Found,10/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/cf_ABDB_02qj5qta_1_1_892529578,infinity ,0023L5
APPSRV,172.25.128.100,09/11/2017,12/13/2017,MG16-Genral-OS-LINUX-MONTHLY,/;/Monitoring/;/Monitoring_old/;/NetBackup_7.5_CLIENTS.tar;/bin/;/boot/;/data1/;/data2/;/dev/;/etc/;/home/;/lib/;/lib64/;/lost+found/;/media/;/misc/;/mnt/;/net/;/opt/;/proc/;/root/;/sbin/;/selinux/;/srv/;/sys/;/tftpboot/;/tmp/;/usr/;/var/,3 months ,0089L5
APPSRV,172.25.128.100,09/11/2017,12/13/2017,MG16-Genral-OS-LINUX-MONTHLY,/;/Monitoring/;/Monitoring_old/;/NetBackup_7.5_CLIENTS.tar;/bin/;/boot/;/data1/;/data2/;/dev/;/etc/;/home/;/lib/;/lib64/;/lost+found/;/media/;/misc/;/mnt/;/net/;/opt/;/proc/;/root/;/sbin/;/selinux/;/srv/;/sys/;/tftpboot/;/tmp/;/usr/;/var/,3 months ,0090L5;0094L5;0089L5;0091L5;
APPSRV,172.25.128.100,09/11/2017,12/13/2017,MG16-Genral-OS-LINUX-MONTHLY,/;/Monitoring/;/Monitoring_old/;/NetBackup_7.5_CLIENTS.tar;/bin/;/boot/;/data1/;/data2/;/dev/;/etc/;/home/;/lib/;/lib64/;/lost+found/;/media/;/misc/;/mnt/;/net/;/opt/;/proc/;/root/;/sbin/;/selinux/;/srv/;/sys/;/tftpboot/;/tmp/;/usr/;/var/,3 months ,0091L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,/etc/,1 month ,0078L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,DB_Backup/Archived/Offline,1 month ,0078L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,/home/bblite/,1 month ,0078L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,/home/bblite/,1 month ,0060L5
원하는 출력
AB-DB1,No_IP_Found,11/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/avqln1ic_1_1,infinity ,0014L5;0015L5;0017L5;0019L5
AB-DB1,No_IP_Found,11/01/2015,INFINITY,Adhoc-ab-db1-Sunset,DB_Backup/Archived/Offline,infinity ,No_Media_Used/Disk
AB-DB1,No_IP_Found,10/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/c-3885516020-20151008-00,infinity ,0007L5
AB-DB1,No_IP_Found,10/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/cf_ABDB_02qj5qta_1_1_892529578,infinity ,0023L5
APPSRV,172.25.128.100,09/11/2017,12/13/2017,MG16-Genral-OS-LINUX-MONTHLY,/;/Monitoring/;/Monitoring_old/;/NetBackup_7.5_CLIENTS.tar;/bin/;/boot/;/data1/;/data2/;/dev/;/etc/;/home/;/lib/;/lib64/;/lost+found/;/media/;/misc/;/mnt/;/net/;/opt/;/proc/;/root/;/sbin/;/selinux/;/srv/;/sys/;/tftpboot/;/tmp/;/usr/;/var/,3 months ,0089L5;0090L5;0094L5;0091L5;
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,/etc/,1 month ,0078L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,DB_Backup/Archived/Offline,1 month ,0078L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,/home/bblite/,1 month ,0078L5;0060L5
현재 아래 코드를 사용하고 있습니다.
awk -F' *, *' '{a[$1","$2","$3","$4","$5","$6","$7]=a[$1","$2","$3","$4","$5","$6","$7] "," $8} END {for (k in a) {print k a[k] | "sort -u"}}'
하지만 제대로 작동하지 않는 것 같습니다. 누구든지 어떤 제안을 해줄 수 있나요?
답변1
싱글로멍하니프로세스:
awk -F, 'BEGIN{ PROCINFO["sorted_in"]="@ind_str_asc" }
{ k=$1 FS $2 FS $3 FS $4 FS $5 FS $6 FS $7 }
{ a[k]=a[k]? a[k]";"$8 : $8 }
END{
for(i in a) {
split(a[i],b,";"); r="";
for (j in b) { if(!c[b[j]]++) r=(r=="")? b[j]: r";"b[j] }
delete c; print i,r
}
}' OFS=',' file
PROCINFO["sorted_in"]="@ind_str_asc"
- 배열 키/인덱스를 기준으로 비교/정렬합니다. 미리 정의된 배열을PROCINFO
미리 정의된 값 집합 중 하나로 설정할 수 있습니다. 이러한 특수 값은 여기에 설명되어 있습니다(문서).https://www.gnu.org/software/gawk/manual/gawk.html#Controlling-Scanningk=$1 FS $2 FS $3 FS $4 FS $5 FS $6 FS $7
- 고유한 키를 만드세요a[k]=a[k]? a[k]";"$8 : $8
- 각 필드별로 8번째 필드 값을 누적그룹
산출:
AB-DB1,No_IP_Found,10/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/c-3885516020-20151008-00,infinity ,0007L5
AB-DB1,No_IP_Found,10/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/cf_ABDB_02qj5qta_1_1_892529578,infinity ,0023L5
AB-DB1,No_IP_Found,11/01/2015,INFINITY,Adhoc-ab-db1-Sunset,DB_Backup/Archived/Offline,infinity ,No_Media_Used/Disk
AB-DB1,No_IP_Found,11/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/avqln1ic_1_1,infinity ,0014L5;0015L5;0017L5;0019L5
APPSRV,172.25.128.100,09/11/2017,12/13/2017,MG16-Genral-OS-LINUX-MONTHLY,/;/Monitoring/;/Monitoring_old/;/NetBackup_7.5_CLIENTS.tar;/bin/;/boot/;/data1/;/data2/;/dev/;/etc/;/home/;/lib/;/lib64/;/lost+found/;/media/;/misc/;/mnt/;/net/;/opt/;/proc/;/root/;/sbin/;/selinux/;/srv/;/sys/;/tftpboot/;/tmp/;/usr/;/var/,3 months ,0089L5;0090L5;0094L5;0091L5;
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,/etc/,1 month ,0078L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,/home/bblite/,1 month ,0078L5;0060L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,DB_Backup/Archived/Offline,1 month ,0078L5
답변2
먼저 이를 고유화 sort -u
한 다음 awk
.
awk -F',' -v OFS=, '{cpyNF=$NF;NF--;a[$0]=a[$0]","cpyNF}
END{for (i in a) print i""a[i]}' <(sort -u infile)
답변3
첫 번째 변종
sed
, awk
그리고 datamash
사용됩니다. 관심이 있고 코드가 원하는 대로 작동한다면 설명을 추가할 수 있습니다. :
나는 (콜론)을 구분 기호로 선택했기 때문에 줄에 (콜론)을 포함해서는 안 됩니다 datamash
. 물론 다른 것으로 변경될 수도 있습니다.
sed 's/,/:/7' input.txt |
datamash -t: -g 1 unique 2 |
awk -F: '{
printf "%s,", $1;
size = split($2, arr, "[,;]");
for(i = 1; i <= size; i++) {
if(!unique_arr[arr[i]]) {
printf "%s; ", arr[i];
unique_arr[arr[i]] = 1;
}
}
print "";
delete unique_arr;
}' | sed 's/[; ]*$//g'
산출
AB-DB1,No_IP_Found,11/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/avqln1ic_1_1,infinity ,0014L5; 0015L5; 0017L5; 0019L5
AB-DB1,No_IP_Found,11/01/2015,INFINITY,Adhoc-ab-db1-Sunset,DB_Backup/Archived/Offline,infinity ,No_Media_Used/Disk
AB-DB1,No_IP_Found,10/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/c-3885516020-20151008-00,infinity ,0007L5
AB-DB1,No_IP_Found,10/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/cf_ABDB_02qj5qta_1_1_892529578,infinity ,0023L5
APPSRV,172.25.128.100,09/11/2017,12/13/2017,MG16-Genral-OS-LINUX-MONTHLY,/;/Monitoring/;/Monitoring_old/;/NetBackup_7.5_CLIENTS.tar;/bin/;/boot/;/data1/;/data2/;/dev/;/etc/;/home/;/lib/;/lib64/;/lost+found/;/media/;/misc/;/mnt/;/net/;/opt/;/proc/;/root/;/sbin/;/selinux/;/srv/;/sys/;/tftpboot/;/tmp/;/usr/;/var/,3 months ,0089L5; 0090L5; 0094L5; 0091L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,/etc/,1 month ,0078L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,DB_Backup/Archived/Offline,1 month ,0078L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,/home/bblite/,1 month ,0060L5; 0078L5
두 번째 변형 - 데이터매시 없음
sed 's/,/:/7; s/\s*;$//' input.txt |
awk -F: '
{
size = split($2, arr_eight_field, ";");
for(i = 1; i <= size; i++) {
main_arr[$1][arr_eight_field[i]] = 1;
}
}
END {
for(seven_fields in main_arr) {
eight = "";
for(i in main_arr[seven_fields]) {
eight = (eight) ? eight "; " i : i;
}
print seven_fields "," eight;
}
}' | sort
산출
AB-DB1,No_IP_Found,10/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/c-3885516020-20151008-00,infinity ,0007L5
AB-DB1,No_IP_Found,10/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/cf_ABDB_02qj5qta_1_1_892529578,infinity ,0023L5
AB-DB1,No_IP_Found,11/01/2015,INFINITY,Adhoc-ab-db1-Sunset,DB_Backup/Archived/Offline,infinity ,No_Media_Used/Disk
AB-DB1,No_IP_Found,11/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/avqln1ic_1_1,infinity ,0017L5; 0015L5; 0019L5; 0014L5
APPSRV,172.25.128.100,09/11/2017,12/13/2017,MG16-Genral-OS-LINUX-MONTHLY,/;/Monitoring/;/Monitoring_old/;/NetBackup_7.5_CLIENTS.tar;/bin/;/boot/;/data1/;/data2/;/dev/;/etc/;/home/;/lib/;/lib64/;/lost+found/;/media/;/misc/;/mnt/;/net/;/opt/;/proc/;/root/;/sbin/;/selinux/;/srv/;/sys/;/tftpboot/;/tmp/;/usr/;/var/,3 months ,0090L5; 0089L5; 0091L5; 0094L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,DB_Backup/Archived/Offline,1 month ,0078L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,/etc/,1 month ,0078L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,/home/bblite/,1 month ,0060L5; 0078L5