awk 셀 값을 기준으로 행 병합

awk 셀 값을 기준으로 행 병합

awk를 사용하여 열 1,2,3,4,5,6,7과 일치하는 행을 병합하고 그 중 열 8의 고유 항목을 병합하고 싶습니다.

입력은

AB-DB1,No_IP_Found,11/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/avqln1ic_1_1,infinity ,0014L5
AB-DB1,No_IP_Found,11/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/avqln1ic_1_1,infinity ,0015L5;0017L5
AB-DB1,No_IP_Found,11/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/avqln1ic_1_1,infinity ,0017L5;0019L5
AB-DB1,No_IP_Found,11/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/avqln1ic_1_1,infinity ,0019L5
AB-DB1,No_IP_Found,11/01/2015,INFINITY,Adhoc-ab-db1-Sunset,DB_Backup/Archived/Offline,infinity ,No_Media_Used/Disk
AB-DB1,No_IP_Found,11/01/2015,INFINITY,Adhoc-ab-db1-Sunset,DB_Backup/Archived/Offline,infinity ,No_Media_Used/Disk
AB-DB1,No_IP_Found,11/01/2015,INFINITY,Adhoc-ab-db1-Sunset,DB_Backup/Archived/Offline,infinity ,No_Media_Used/Disk
AB-DB1,No_IP_Found,10/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/c-3885516020-20151008-00,infinity ,0007L5
AB-DB1,No_IP_Found,10/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/cf_ABDB_02qj5qta_1_1_892529578,infinity ,0023L5
APPSRV,172.25.128.100,09/11/2017,12/13/2017,MG16-Genral-OS-LINUX-MONTHLY,/;/Monitoring/;/Monitoring_old/;/NetBackup_7.5_CLIENTS.tar;/bin/;/boot/;/data1/;/data2/;/dev/;/etc/;/home/;/lib/;/lib64/;/lost+found/;/media/;/misc/;/mnt/;/net/;/opt/;/proc/;/root/;/sbin/;/selinux/;/srv/;/sys/;/tftpboot/;/tmp/;/usr/;/var/,3 months ,0089L5
APPSRV,172.25.128.100,09/11/2017,12/13/2017,MG16-Genral-OS-LINUX-MONTHLY,/;/Monitoring/;/Monitoring_old/;/NetBackup_7.5_CLIENTS.tar;/bin/;/boot/;/data1/;/data2/;/dev/;/etc/;/home/;/lib/;/lib64/;/lost+found/;/media/;/misc/;/mnt/;/net/;/opt/;/proc/;/root/;/sbin/;/selinux/;/srv/;/sys/;/tftpboot/;/tmp/;/usr/;/var/,3 months ,0090L5;0094L5;0089L5;0091L5;
APPSRV,172.25.128.100,09/11/2017,12/13/2017,MG16-Genral-OS-LINUX-MONTHLY,/;/Monitoring/;/Monitoring_old/;/NetBackup_7.5_CLIENTS.tar;/bin/;/boot/;/data1/;/data2/;/dev/;/etc/;/home/;/lib/;/lib64/;/lost+found/;/media/;/misc/;/mnt/;/net/;/opt/;/proc/;/root/;/sbin/;/selinux/;/srv/;/sys/;/tftpboot/;/tmp/;/usr/;/var/,3 months ,0091L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,/etc/,1 month ,0078L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,DB_Backup/Archived/Offline,1 month ,0078L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,/home/bblite/,1 month ,0078L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,/home/bblite/,1 month ,0060L5

원하는 출력

AB-DB1,No_IP_Found,11/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/avqln1ic_1_1,infinity ,0014L5;0015L5;0017L5;0019L5
AB-DB1,No_IP_Found,11/01/2015,INFINITY,Adhoc-ab-db1-Sunset,DB_Backup/Archived/Offline,infinity ,No_Media_Used/Disk
AB-DB1,No_IP_Found,10/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/c-3885516020-20151008-00,infinity ,0007L5
AB-DB1,No_IP_Found,10/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/cf_ABDB_02qj5qta_1_1_892529578,infinity ,0023L5
APPSRV,172.25.128.100,09/11/2017,12/13/2017,MG16-Genral-OS-LINUX-MONTHLY,/;/Monitoring/;/Monitoring_old/;/NetBackup_7.5_CLIENTS.tar;/bin/;/boot/;/data1/;/data2/;/dev/;/etc/;/home/;/lib/;/lib64/;/lost+found/;/media/;/misc/;/mnt/;/net/;/opt/;/proc/;/root/;/sbin/;/selinux/;/srv/;/sys/;/tftpboot/;/tmp/;/usr/;/var/,3 months ,0089L5;0090L5;0094L5;0091L5;
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,/etc/,1 month ,0078L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,DB_Backup/Archived/Offline,1 month ,0078L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,/home/bblite/,1 month ,0078L5;0060L5

현재 아래 코드를 사용하고 있습니다.

awk -F' *, *' '{a[$1","$2","$3","$4","$5","$6","$7]=a[$1","$2","$3","$4","$5","$6","$7] "," $8} END {for (k in a) {print k a[k] | "sort -u"}}'

하지만 제대로 작동하지 않는 것 같습니다. 누구든지 어떤 제안을 해줄 수 있나요?

답변1

싱글로멍하니프로세스:

awk -F, 'BEGIN{ PROCINFO["sorted_in"]="@ind_str_asc" }
        { k=$1 FS $2 FS $3 FS $4 FS $5 FS $6 FS $7 }
        { a[k]=a[k]? a[k]";"$8 : $8 }
        END{ 
            for(i in a) { 
                split(a[i],b,";"); r=""; 
                for (j in b) { if(!c[b[j]]++) r=(r=="")? b[j]: r";"b[j] }
                delete c; print i,r 
            } 
        }' OFS=',' file
  • PROCINFO["sorted_in"]="@ind_str_asc"- 배열 키/인덱스를 기준으로 비교/정렬합니다. 미리 정의된 배열을 PROCINFO미리 정의된 값 집합 중 하나로 설정할 수 있습니다. 이러한 특수 값은 여기에 설명되어 있습니다(문서).https://www.gnu.org/software/gawk/manual/gawk.html#Controlling-Scanning

  • k=$1 FS $2 FS $3 FS $4 FS $5 FS $6 FS $7- 고유한 키를 만드세요

  • a[k]=a[k]? a[k]";"$8 : $8- 각 필드별로 8번째 필드 값을 누적그룹


산출:

AB-DB1,No_IP_Found,10/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/c-3885516020-20151008-00,infinity ,0007L5
AB-DB1,No_IP_Found,10/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/cf_ABDB_02qj5qta_1_1_892529578,infinity ,0023L5
AB-DB1,No_IP_Found,11/01/2015,INFINITY,Adhoc-ab-db1-Sunset,DB_Backup/Archived/Offline,infinity ,No_Media_Used/Disk
AB-DB1,No_IP_Found,11/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/avqln1ic_1_1,infinity ,0014L5;0015L5;0017L5;0019L5
APPSRV,172.25.128.100,09/11/2017,12/13/2017,MG16-Genral-OS-LINUX-MONTHLY,/;/Monitoring/;/Monitoring_old/;/NetBackup_7.5_CLIENTS.tar;/bin/;/boot/;/data1/;/data2/;/dev/;/etc/;/home/;/lib/;/lib64/;/lost+found/;/media/;/misc/;/mnt/;/net/;/opt/;/proc/;/root/;/sbin/;/selinux/;/srv/;/sys/;/tftpboot/;/tmp/;/usr/;/var/,3 months ,0089L5;0090L5;0094L5;0091L5;
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,/etc/,1 month ,0078L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,/home/bblite/,1 month ,0078L5;0060L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,DB_Backup/Archived/Offline,1 month ,0078L5

답변2

먼저 이를 고유화 sort -u한 다음 awk.

awk -F',' -v OFS=, '{cpyNF=$NF;NF--;a[$0]=a[$0]","cpyNF} 
    END{for (i in a) print i""a[i]}' <(sort -u infile)

답변3

첫 번째 변종

sed, awk그리고 datamash사용됩니다. 관심이 있고 코드가 원하는 대로 작동한다면 설명을 추가할 수 있습니다. :나는 (콜론)을 구분 기호로 선택했기 때문에 줄에 (콜론)을 포함해서는 안 됩니다 datamash. 물론 다른 것으로 변경될 수도 있습니다.

sed 's/,/:/7' input.txt |
datamash -t: -g 1 unique 2 | 
awk -F: '{
    printf "%s,", $1;

    size = split($2, arr, "[,;]");

    for(i = 1; i <= size; i++) {
        if(!unique_arr[arr[i]]) {
            printf "%s; ", arr[i];
            unique_arr[arr[i]] = 1;
        }
    }
    print "";
    delete unique_arr;
}' | sed 's/[; ]*$//g'

산출

AB-DB1,No_IP_Found,11/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/avqln1ic_1_1,infinity ,0014L5; 0015L5; 0017L5; 0019L5
AB-DB1,No_IP_Found,11/01/2015,INFINITY,Adhoc-ab-db1-Sunset,DB_Backup/Archived/Offline,infinity ,No_Media_Used/Disk
AB-DB1,No_IP_Found,10/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/c-3885516020-20151008-00,infinity ,0007L5
AB-DB1,No_IP_Found,10/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/cf_ABDB_02qj5qta_1_1_892529578,infinity ,0023L5
APPSRV,172.25.128.100,09/11/2017,12/13/2017,MG16-Genral-OS-LINUX-MONTHLY,/;/Monitoring/;/Monitoring_old/;/NetBackup_7.5_CLIENTS.tar;/bin/;/boot/;/data1/;/data2/;/dev/;/etc/;/home/;/lib/;/lib64/;/lost+found/;/media/;/misc/;/mnt/;/net/;/opt/;/proc/;/root/;/sbin/;/selinux/;/srv/;/sys/;/tftpboot/;/tmp/;/usr/;/var/,3 months ,0089L5; 0090L5; 0094L5; 0091L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,/etc/,1 month ,0078L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,DB_Backup/Archived/Offline,1 month ,0078L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,/home/bblite/,1 month ,0060L5; 0078L5

두 번째 변형 - 데이터매시 없음

sed 's/,/:/7; s/\s*;$//' input.txt |
awk -F: '
{
    size = split($2, arr_eight_field, ";"); 
    for(i = 1; i <= size; i++) {
        main_arr[$1][arr_eight_field[i]] = 1;
    }
}
END {
    for(seven_fields in main_arr) {
        eight = "";
        for(i in main_arr[seven_fields]) {
            eight = (eight) ? eight "; " i : i;
        }
        print seven_fields "," eight;
    }
}' | sort

산출

AB-DB1,No_IP_Found,10/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/c-3885516020-20151008-00,infinity ,0007L5
AB-DB1,No_IP_Found,10/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/cf_ABDB_02qj5qta_1_1_892529578,infinity ,0023L5
AB-DB1,No_IP_Found,11/01/2015,INFINITY,Adhoc-ab-db1-Sunset,DB_Backup/Archived/Offline,infinity ,No_Media_Used/Disk
AB-DB1,No_IP_Found,11/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/avqln1ic_1_1,infinity ,0017L5; 0015L5; 0019L5; 0014L5
APPSRV,172.25.128.100,09/11/2017,12/13/2017,MG16-Genral-OS-LINUX-MONTHLY,/;/Monitoring/;/Monitoring_old/;/NetBackup_7.5_CLIENTS.tar;/bin/;/boot/;/data1/;/data2/;/dev/;/etc/;/home/;/lib/;/lib64/;/lost+found/;/media/;/misc/;/mnt/;/net/;/opt/;/proc/;/root/;/sbin/;/selinux/;/srv/;/sys/;/tftpboot/;/tmp/;/usr/;/var/,3 months ,0090L5; 0089L5; 0091L5; 0094L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,DB_Backup/Archived/Offline,1 month ,0078L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,/etc/,1 month ,0078L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,/home/bblite/,1 month ,0060L5; 0078L5

관련 정보