/* File: fcomp.cc Author: Sudarshan S. Chawathe Date: 3rd March, 2003 Terms: GPL For the Maryland Programming Contest, 2003. For details, see the file fcomp-desc.tex, or http://www.cs.umd.edu/ fcomp: A simple implementation of front coding Copyright (C) 2003 Sudarshan S. Chawathe This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include #include #include // Uncomment the following lines if using APCS classes // #include "apvector.h" // #include "apmatrix.h" // #include "apstack.h" // #include "apqueue.h" #include "apstring.cpp" /* maximum number of chars in a name: */ #define NAMESIZ 500 /* maximum number of names in database: */ #define MAXNAMES 1000 #define min(a,b) (((a)<=(b) ? (a) : (b))) int name_comp(const void * n1, const void * n2); int encode_offset(int i, char * b); int decode_offset(char * b); void print_byte(char b); int common_prefix_len(char * a, char * b); void encode_db(char names[MAXNAMES][NAMESIZ + 2], int num_names); void decode_db(int offsets[MAXNAMES], char names[MAXNAMES][NAMESIZ + 2], int num_entries); void read_names(char names[MAXNAMES][NAMESIZ + 2], int num_names); void read_db(int offsets[MAXNAMES], char names[MAXNAMES][NAMESIZ + 2], int n); int decode_nybble_string(char * ns); //----------------------------------------------------------------------- // main //----------------------------------------------------------------------- int main(int argc, char * argv[]) { char action[3]; cin >> action; if(action[0] == 'e') { char names[MAXNAMES][NAMESIZ + 2]; int num_names; cin >> num_names; assert (num_names < MAXNAMES); read_names(names, num_names); encode_db(names, num_names); } else if(action[0] == 'd') { int offsets[MAXNAMES]; char names[MAXNAMES][NAMESIZ + 2]; int num_entries; cin >> num_entries; assert (num_entries < MAXNAMES); read_db(offsets, names, num_entries); decode_db(offsets, names, num_entries); } else assert(0); return 0; } void read_names(char names[MAXNAMES][NAMESIZ + 2], int num_names) { for(int i = 0; i < num_names; i++) { cin >> names[i]; } } void read_db(int offsets[MAXNAMES], char suffixes[MAXNAMES][NAMESIZ + 2], int n) { for(int i = 0; i < n; i++) { char ns1[5], ns2[5]; /* two nybbles as strings */ cin >> ns1; cin >> ns2; char b[3]; b[0] = decode_nybble_string(ns1)*16 + decode_nybble_string(ns2); if(b[0] == -128) { /* marker; must read two more bytes */ char ns3[5], ns4[5], ns5[5], ns6[5]; cin >> ns3; cin >> ns4; cin >> ns5; cin >> ns6; b[1] = decode_nybble_string(ns3)*16 + decode_nybble_string(ns4); b[2] = decode_nybble_string(ns5)*16 + decode_nybble_string(ns6); } offsets[i] = decode_offset(b); assert(offsets[i] != -32768); cin >> suffixes[i]; } } int decode_nybble_string(char * ns) { int res = 0; for(int i = 0; i < 4; i++) { res = 2*res + (ns[i] == '1' ? 1 : 0); } return res; } void encode_db(char names[MAXNAMES][NAMESIZ + 2], int num_names) { qsort(names, num_names, NAMESIZ + 2, name_comp); int i; int offsets[MAXNAMES]; /* differential offsets */ offsets[0] = 0; int prev_offset = 0; for(i = 1; i < num_names; i++) { int offset = common_prefix_len(names[i-1], names[i]); int diff_offset = offset - prev_offset; prev_offset = offset; offsets[i] = diff_offset; } cout << "d" << endl; cout << num_names << endl; int prefix_len = 0; for(i = 0; i < num_names; i++) { char b[3]; prefix_len += offsets[i]; assert(0 <= prefix_len); assert(prefix_len <= (int) strlen(names[i])); int num_bytes = encode_offset(offsets[i], b); for(int j = 0; j < num_bytes; j++) print_byte(b[j]); cout << names[i]+prefix_len << endl; } } void decode_db(int offsets[MAXNAMES], char names[MAXNAMES][NAMESIZ + 2], int num_entries) { cout << "e" << endl; cout << num_entries << endl; char prev_name[NAMESIZ + 1]; int prev_offset = 0; /* non-differential offset */ for(int i = 0; i < num_entries; i++) { int offset = prev_offset + offsets[i]; assert((0 <= offset) && (offset <= (int) strlen(prev_name))); char name[NAMESIZ + 1] = ""; strncpy(name, prev_name, offset); strncat(name, names[i], NAMESIZ); cout << name << endl; strncpy(prev_name, name, NAMESIZ + 1); prev_offset = offset; } } int name_comp(const void * in1, const void * in2) { char * a = (char*) in1; char * b = (char*) in2; assert(a != NULL && b != NULL); int m = strlen(a); int n = strlen(b); int i; for(i = 0; i < min(m,n); i++) { if(a[i] < b[i]) return -1; if(a[i] > b[i]) return 1; } return (i == m ? (i == n ? 0 : -1) : 1); } /* encode n in the three bytes to which b points; see fcomp-desc.tex; returns number of bytes written (1 or 3) */ int encode_offset(int n, char * b) { assert(-32767 <= n && n <= 32767); if(-127 <= n && n <= 127) { /* 1-byte coding */ b[0] = n; return 1; } else { /* 3-byte coding */ b[0] = (char) 0x80; /* marker byte */ b[1] = (char) (n >> 8) & 0xff; /* 2nd-least-significant byte */ b[2] = (char) n & 0xff; /* least-significant byte */ return 3; } } /* return the offset coded by the three bytes to which b points */ int decode_offset(char * b) { if(b[0] != -128) { /* 1-byte coding */ return b[0]; } else { /* 3-byte coding */ return (b[1]<<8) + b[2]; } } void print_byte(char b) { for(int i = 7; i >= 0; i--) { int bit_i = (b >> i) & 0x01; cout << (bit_i == 1) ? '1' : '0'; if(i==4) cout << " "; } cout << " "; } int common_prefix_len(char * a, char * b) { assert(a != NULL && b != NULL); int m = strlen(a); int n = strlen(b); int i; for(i = 0; i < min(m,n); i++) { if(a[i] != b[i]) return i; } return i; }