awk 패턴 일치 및 병합 파일

awk 패턴 일치 및 병합 파일

1.csv, 2.csv 및 3.csv의 3개 파일이 있습니다.

1.csv

TELECOM_DEVELOPMENT_AFGHANISTAN_COMPANY,AFGHANISTAN,Alphanumeric_A_MSISDN_blocking,1  
CABLE&WIRELESS_BARBADOS,BARBADOS,Alphanumeric_A_MSISDN_blocking,791  
SIMINN_ICELAND_TELECOM,ICELAND,Alphanumeric_A_MSISDN_blocking,109373  
CABLE&WIRELESS_SEYCHELLES,SEYCHELLES,Alphanumeric_A_MSISDN_blocking,2  
CABLE&WIRELESS_JAMAICA,JAMAICA,Alphanumeric_A_MSISDN_blocking,85  

2.csv

SIMINN_ICELAND_TELECOM,ICELAND,SPAM_CHAIN_SMS_REJECT(Spam_Detection_and_Blocking),7795  
CABLE&WIRELESS_SEYCHELLES,SEYCHELLES,SPAM_CHAIN_SMS_REJECT(Spam_Detection_and_Blocking),638

3.csv:

TELECOM_DEVELOPMENT_AFGHANISTAN_COMPANY,AFGHANISTAN,Calling_Party_Address_Blocking,79  
CABLE&WIRELESS_BARBADOS,BARBADOS,Calling_Party_Address_Blocking,30  
MOBILKOM_LIECHTENSTEIN,LIECHTENSTEIN,Calling_Party_Address_Blocking,6
SYNIVERSE_ANSI,UNITED_STATES,Calling_Party_Address_Blocking,12

아래와 같이 출력 파일을 인쇄하도록 파일을 병합하고 싶습니다.

TELECOM_DEVELOPMENT_AFGHANISTAN_COMPANY,AFGHANISTAN,Alphanumeric_A_MSISDN_blocking,1,NA,NA,Calling_Party_Address_Blocking,79
CABLE&WIRELESS_BARBADOS,BARBADOS,Alphanumeric_A_MSISDN_blocking,791,NA,NA,Calling_Party_Address_Blocking,30
SIMINN_ICELAND_TELECOM,ICELAND,Alphanumeric_A_MSISDN_blocking,109373,SPAM_CHAIN_SMS_REJECT(Spam_Detection_and_Blocking),7795,NA,NA
CABLE&WIRELESS_SEYCHELLES,SEYCHELLES,Alphanumeric_A_MSISDN_blocking,2,SPAM_CHAIN_SMS_REJECT(Spam_Detection_and_Blocking),638,NA,NA
CABLE&WIRELESS_JAMAICA,JAMAICA,Alphanumeric_A_MSISDN_blocking,85,NA,NA,NA,NA 
MOBILKOM_LIECHTENSTEIN,LIECHTENSTEIN,NA,NA,NA,NA,Calling_Party_Address_Blocking,6
SYNIVERSE_ANSI,UNITED_STATES,NA,NA,NA,NA,Calling_Party_Address_Blocking,12

답변1

출력 순서가 마음에 들지 않으면 perl해결 방법은 다음과 같습니다.

$ perl -F',' -anle '
    push @{$h{$F[0].",".$F[1]}{$ARGV}}, @F[2..$#F];
    push @file, $ARGV if eof;
    END {
        for $k (keys %h) {
            for (sort {$a <=> $b} @file) {
                if (defined($h{$k}{$_})) {
                    push @tmp, @{$h{$k}{$_}};
                } else {
                    push @tmp, qw(NA NA);
                }
            }
            print join ",",($k,@tmp);
            @tmp=();
        }
    }
' 1.csv 2.csv 3.csv
MOBILKOM_LIECHTENSTEIN,LIECHTENSTEIN,NA,NA,NA,NA,Calling_Party_Address_Blocking,6
CABLE&WIRELESS_SEYCHELLES,SEYCHELLES,Alphanumeric_A_MSISDN_blocking,2,SPAM_CHAIN_SMS_REJECT(Spam_Detection_and_Blocking),638,NA,NA
CABLE&WIRELESS_BARBADOS,BARBADOS,Alphanumeric_A_MSISDN_blocking,791,NA,NA,Calling_Party_Address_Blocking,30
SIMINN_ICELAND_TELECOM,ICELAND,Alphanumeric_A_MSISDN_blocking,109373,SPAM_CHAIN_SMS_REJECT(Spam_Detection_and_Blocking),7795,NA,NA
CABLE&WIRELESS_JAMAICA,JAMAICA,Alphanumeric_A_MSISDN_blocking,85,NA,NA,NA,NA
SYNIVERSE_ANSI,UNITED_STATES,NA,NA,NA,NA,Calling_Party_Address_Blocking,12
TELECOM_DEVELOPMENT_AFGHANISTAN_COMPANY,AFGHANISTAN,Alphanumeric_A_MSISDN_blocking,1,NA,NA,Calling_Party_Address_Blocking,79

답변2

존재하다 awk:

#!/usr/bin/awk -f

BEGIN {
    FS=",";
}

{  
    k=$1
    if (k in ar) {
        for (i = 3; i <= NF; i++) ar[k] = ar[k] "," $i
    } else {
        ar[k] = $0;
    }
}

END { 
    for (j in ar) print ar[j]
}

다른 이름으로 저장 merge.awk하고 실행 가능하게 만듭니다 chmod +x merge.awk.

다음과 같이 출력되어야 합니다.

CABLE&WIRELESS_SEYCHELLES,SEYCHELLES,Alphanumeric_A_MSISDN_blocking,2,SPAM_CHAIN_SMS_REJECT(Spam_Detection_and_Blocking),638
SYNIVERSE_ANSI,UNITED_STATES,Calling_Party_Address_Blocking,12
MOBILKOM_LIECHTENSTEIN,LIECHTENSTEIN,Calling_Party_Address_Blocking,6
CABLE&WIRELESS_JAMAICA,JAMAICA,Alphanumeric_A_MSISDN_blocking,85
SIMINN_ICELAND_TELECOM,ICELAND,Alphanumeric_A_MSISDN_blocking,109373,SPAM_CHAIN_SMS_REJECT(Spam_Detection_and_Blocking),7795
TELECOM_DEVELOPMENT_AFGHANISTAN_COMPANY,AFGHANISTAN,Alphanumeric_A_MSISDN_blocking,1,Calling_Party_Address_Blocking,79
CABLE&WIRELESS_BARBADOS,BARBADOS,Alphanumeric_A_MSISDN_blocking,791,Calling_Party_Address_Blocking,30

또는 .merge.awk 1.csv 2.csv 3.csvmerge.awk *.csv

답변3

awk -F ',' '
  FILENAME == ARGV[1] {
    a[++n] = ($1 FS $2);
    ar1[a[n]]=($3 FS $4);
    next}
  FILENAME == ARGV[2] {
    if(!(($1 FS $2) in ar1))
      {a[++n] = ($1 FS $2)};
    ar2[$1 FS $2]=($3 FS $4);
    next}
  FILENAME == ARGV[3] {
    if(!(($1 FS $2) in ar1))
      {a[++n] = ($1 FS $2)};
    ar3[$1 FS $2]=($3 FS $4);
    next}
  END {for(i=1; i<=n; i++)
    {if(!(a[i] in ar1))
      ar1[a[i]] = ("NA,NA")
    if(!(a[i] in ar2))
      ar2[a[i]] = ("NA,NA")
    if(!(a[i] in ar3))
      ar3[a[i]] = ("NA,NA")
    printf "%s,%s,%s,%s\n",
      a[i], ar1[a[i]], ar2[a[i]], ar3[a[i]]}}' 1.csv 2.csv 3.csv

위 명령의 출력은 다음과 같습니다.

TELECOM_DEVELOPMENT_AFGHANISTAN_COMPANY,AFGHANISTAN,Alphanumeric_A_MSISDN_blocking,1,NA,NA,Calling_Party_Address_Blocking,79
CABLE&WIRELESS_BARBADOS,BARBADOS,Alphanumeric_A_MSISDN_blocking,791,NA,NA,Calling_Party_Address_Blocking,30
SIMINN_ICELAND_TELECOM,ICELAND,Alphanumeric_A_MSISDN_blocking,109373,SPAM_CHAIN_SMS_REJECT(Spam_Detection_and_Blocking),7795,NA,NA
CABLE&WIRELESS_SEYCHELLES,SEYCHELLES,Alphanumeric_A_MSISDN_blocking,2,SPAM_CHAIN_SMS_REJECT(Spam_Detection_and_Blocking),638,NA,NA
CABLE&WIRELESS_JAMAICA,JAMAICA,Alphanumeric_A_MSISDN_blocking,85,NA,NA,NA,NA
MOBILKOM_LIECHTENSTEIN,LIECHTENSTEIN,NA,NA,NA,NA,Calling_Party_Address_Blocking,6
SYNIVERSE_ANSI,UNITED_STATES,NA,NA,NA,NA,Calling_Party_Address_Blocking,12

관련 정보