summaryrefslogtreecommitdiff
path: root/src/tools/chmorph.cxx
blob: 0faa8f0843e5acd4f3a16fc605b360e64ce45e70 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#include <cstring>
#include <cstdlib>
#include <cstdio>

#include "hunspell.hxx"
#include "textparser.hxx"

#ifndef W32
using namespace std;
#endif

int 
main(int argc, char** argv)
{
    FILE * f;
    
    /* first parse the command line options */

    for (int i = 1; i < 6; i++)
	if (!argv[i]) {
	    fprintf(stderr, 
	    "chmorph - change affixes by morphological analysis and generation\n" 
	    "correct syntax is:\nchmorph affix_file "
            "dictionary_file file_to_convert STRING1 STRING2\n"
            "STRINGS may be arbitrary parts of the morphological descriptions\n"
	    "example: chmorph hu.aff hu.dic hu.txt SG_2 SG_3 "
	    " (convert informal Hungarian second person texts to formal third person texts)\n");
	    exit(1);
	}

    /* open the words to check list */

    f = fopen(argv[3], "r");
    if (!f) {
	fprintf(stderr, "Error - could not open file to check\n");
	exit(1);
    }

    Hunspell *pMS = new Hunspell(argv[1], argv[2]);
    TextParser * p = new TextParser("qwertzuiopasdfghjklyxcvbnméáúõûóüöíQWERTZUIOPASDFGHJKLYXCVBNMÍÉÁÕÚÖÜÓÛ");
    
    char buf[MAXLNLEN];
    char * next;

    while(fgets(buf,MAXLNLEN,f)) {
      p->put_line(buf);
      while ((next=p->next_token())) {
          char ** pl;
          int pln = pMS->analyze(&pl, next);
 	  if (pln) {
 	        int gen = 0;
 	        for (int i = 0; i < pln; i++) {
	  	    char *pos = strstr(pl[i], argv[4]);
	  	    if (pos) {
	  	        char * r = (char * ) malloc(strlen(pl[i]) -
	  	            strlen(argv[4]) + strlen(argv[5]) + 1);
	  	        strncpy(r, pl[i], pos - pl[i]);
	  	        strcpy(r + (pos - pl[i]), argv[5]);
	  	        strcat(r, pos + strlen(argv[4]));
	  	        free(pl[i]);
	  	        pl[i] = r;
	  	        gen = 1;
	  	    }
 	        }
	  	if (gen) {
		    char **pl2;
		    int pl2n = pMS->generate(&pl2, next, pl, pln);
		    if (pl2n) {
		        p->change_token(pl2[0]);
		        pMS->free_list(&pl2, pl2n);
		        // jump over the (possibly un)modified word
		        free(next);
		        next=p->next_token();
		    }
		}
		pMS->free_list(&pl, pln);
	  }
	  free(next);
      }
      fprintf(stdout, "%s\n", p->get_line());
    }

    delete p;
    fclose(f);
    return 0;
}