From 96a2e811f5333b2cf22edf40c977863e18e72e48 Mon Sep 17 00:00:00 2001 From: Mahmoud Said Date: Fri, 23 Aug 2013 08:06:06 +0200 Subject: [PATCH 1/2] add the new tags in pages-articles.xml that were not supported by xml2sql --- keywords | 12 +++++ mediawiki.c | 150 +++++++++++++++++++++++++++++----------------------- 2 files changed, 96 insertions(+), 66 deletions(-) diff --git a/keywords b/keywords index c362860..245e8d1 100644 --- a/keywords +++ b/keywords @@ -20,6 +20,12 @@ enum element { el_minor, el_comment, el_text, + el_ns, + el_parentid, + el_sha1, + el_model, + el_format, + el_redirect }; %} struct eltmap { char *name; enum element t; }; @@ -44,3 +50,9 @@ ip, el_ip minor, el_minor comment, el_comment text, el_text +ns, el_ns +parentid, el_parentid +sha1, el_sha1 +model, el_model +format, el_format +redirect, el_redirect \ No newline at end of file diff --git a/mediawiki.c b/mediawiki.c index fbfffe0..24c422a 100644 --- a/mediawiki.c +++ b/mediawiki.c @@ -1,4 +1,4 @@ -/* C code produced by gperf version 3.0.1 */ +/* C code produced by gperf version 3.0.3 */ /* Command-line: gperf -gptoC -Nlu_elt keywords */ /* Computed positions: -k'1,$' */ @@ -52,16 +52,22 @@ enum element { el_minor, el_comment, el_text, + el_ns, + el_parentid, + el_sha1, + el_model, + el_format, + el_redirect }; -#line 25 "keywords" +#line 31 "keywords" struct eltmap { char *name; enum element t; }; -#define TOTAL_KEYWORDS 20 +#define TOTAL_KEYWORDS 26 #define MIN_WORD_LENGTH 2 #define MAX_WORD_LENGTH 12 #define MIN_HASH_VALUE 4 -#define MAX_HASH_VALUE 42 -/* maximum key range = 39, duplicates = 0 */ +#define MAX_HASH_VALUE 39 +/* maximum key range = 36, duplicates = 0 */ #ifdef __GNUC__ __inline @@ -77,38 +83,41 @@ hash (str, len) { static const unsigned char asso_values[] = { - 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 43, 3, 15, - 5, 0, 43, 0, 43, 20, 43, 43, 43, 5, - 5, 15, 20, 43, 0, 0, 0, 10, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43 + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 30, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, 3, 5, + 15, 0, 0, 25, 40, 15, 40, 40, 5, 5, + 15, 15, 10, 40, 0, 5, 0, 10, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40 }; return len + asso_values[(unsigned char)str[len - 1]] + asso_values[(unsigned char)str[0]]; } #ifdef __GNUC__ __inline +#ifdef __GNUC_STDC_INLINE__ +__attribute__ ((__gnu_inline__)) +#endif #endif const struct eltmap * lu_elt (str, len) @@ -118,54 +127,63 @@ lu_elt (str, len) static const struct eltmap wordlist[] = { {""}, {""}, {""}, {""}, -#line 46 "keywords" +#line 52 "keywords" {"text", el_text}, -#line 36 "keywords" +#line 42 "keywords" {"title", el_title}, - {""}, -#line 30 "keywords" +#line 57 "keywords" + {"format", el_format}, +#line 36 "keywords" {"base", el_base}, -#line 29 "keywords" - {"sitename", el_sitename}, -#line 31 "keywords" - {"generator", el_generator}, -#line 44 "keywords" +#line 58 "keywords" + {"redirect", el_redirect}, +#line 38 "keywords" + {"case", el_case}, +#line 50 "keywords" {"minor", el_minor}, {""}, -#line 38 "keywords" +#line 51 "keywords" + {"comment", el_comment}, +#line 35 "keywords" + {"sitename", el_sitename}, +#line 41 "keywords" + {"page", el_page}, +#line 56 "keywords" + {"model", el_model}, +#line 47 "keywords" + {"contributor", el_contributor}, +#line 44 "keywords" {"restrictions", el_restrictions}, -#line 39 "keywords" - {"revision", el_revision}, -#line 34 "keywords" - {"namespace", el_namespace}, -#line 33 "keywords" - {"namespaces", el_namespaces}, - {""}, {""}, -#line 42 "keywords" +#line 48 "keywords" {"username", el_username}, -#line 32 "keywords" - {"case", el_case}, +#line 46 "keywords" + {"timestamp", el_timestamp}, {""}, {""}, +#line 53 "keywords" + {"ns", el_ns}, #line 45 "keywords" - {"comment", el_comment}, -#line 28 "keywords" + {"revision", el_revision}, +#line 40 "keywords" + {"namespace", el_namespace}, + {""}, {""}, +#line 49 "keywords" + {"ip", el_ip}, +#line 34 "keywords" {"siteinfo", el_siteinfo}, -#line 35 "keywords" - {"page", el_page}, +#line 33 "keywords" + {"mediawiki", el_mediawiki}, +#line 39 "keywords" + {"namespaces", el_namespaces}, {""}, -#line 41 "keywords" - {"contributor", el_contributor}, -#line 37 "keywords" +#line 43 "keywords" {"id", el_id}, - {""}, -#line 40 "keywords" - {"timestamp", el_timestamp}, +#line 54 "keywords" + {"parentid", el_parentid}, +#line 37 "keywords" + {"generator", el_generator}, {""}, {""}, {""}, {""}, -#line 27 "keywords" - {"mediawiki", el_mediawiki}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, -#line 43 "keywords" - {"ip", el_ip} +#line 55 "keywords" + {"sha1", el_sha1} }; if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) From ac082ec9353885c8391217033199384790340efa Mon Sep 17 00:00:00 2001 From: Matthew Francis-Landau Date: Thu, 26 Mar 2015 15:00:07 -0700 Subject: [PATCH 2/2] add in new dbname tag --- keywords | 6 ++-- mediawiki.c | 94 ++++++++++++++++++++++++++++------------------------- 2 files changed, 53 insertions(+), 47 deletions(-) diff --git a/keywords b/keywords index 245e8d1..8a85e26 100644 --- a/keywords +++ b/keywords @@ -25,7 +25,8 @@ enum element { el_sha1, el_model, el_format, - el_redirect + el_redirect, + el_dbname }; %} struct eltmap { char *name; enum element t; }; @@ -55,4 +56,5 @@ parentid, el_parentid sha1, el_sha1 model, el_model format, el_format -redirect, el_redirect \ No newline at end of file +redirect, el_redirect +dbname, el_dbname diff --git a/mediawiki.c b/mediawiki.c index 24c422a..ef1828e 100644 --- a/mediawiki.c +++ b/mediawiki.c @@ -1,4 +1,4 @@ -/* C code produced by gperf version 3.0.3 */ +/* C code produced by gperf version 3.0.4 */ /* Command-line: gperf -gptoC -Nlu_elt keywords */ /* Computed positions: -k'1,$' */ @@ -57,12 +57,13 @@ enum element { el_sha1, el_model, el_format, - el_redirect + el_redirect, + el_dbname }; -#line 31 "keywords" +#line 32 "keywords" struct eltmap { char *name; enum element t; }; -#define TOTAL_KEYWORDS 26 +#define TOTAL_KEYWORDS 27 #define MIN_WORD_LENGTH 2 #define MAX_WORD_LENGTH 12 #define MIN_HASH_VALUE 4 @@ -87,14 +88,14 @@ hash (str, len) 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, - 40, 40, 40, 40, 40, 40, 40, 40, 40, 30, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 23, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, - 40, 40, 40, 40, 40, 40, 40, 40, 3, 5, - 15, 0, 0, 25, 40, 15, 40, 40, 5, 5, - 15, 15, 10, 40, 0, 5, 0, 10, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, 30, 5, + 0, 0, 5, 20, 40, 25, 40, 40, 5, 5, + 15, 20, 10, 40, 0, 5, 0, 20, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, @@ -115,7 +116,7 @@ hash (str, len) #ifdef __GNUC__ __inline -#ifdef __GNUC_STDC_INLINE__ +#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__ __attribute__ ((__gnu_inline__)) #endif #endif @@ -127,63 +128,66 @@ lu_elt (str, len) static const struct eltmap wordlist[] = { {""}, {""}, {""}, {""}, -#line 52 "keywords" +#line 53 "keywords" {"text", el_text}, -#line 42 "keywords" +#line 43 "keywords" {"title", el_title}, -#line 57 "keywords" - {"format", el_format}, -#line 36 "keywords" - {"base", el_base}, -#line 58 "keywords" +#line 60 "keywords" + {"dbname", el_dbname}, + {""}, +#line 59 "keywords" {"redirect", el_redirect}, -#line 38 "keywords" +#line 39 "keywords" {"case", el_case}, -#line 50 "keywords" - {"minor", el_minor}, - {""}, #line 51 "keywords" + {"minor", el_minor}, +#line 58 "keywords" + {"format", el_format}, +#line 52 "keywords" {"comment", el_comment}, -#line 35 "keywords" +#line 36 "keywords" {"sitename", el_sitename}, -#line 41 "keywords" +#line 42 "keywords" {"page", el_page}, -#line 56 "keywords" +#line 57 "keywords" {"model", el_model}, -#line 47 "keywords" +#line 48 "keywords" {"contributor", el_contributor}, -#line 44 "keywords" +#line 45 "keywords" {"restrictions", el_restrictions}, -#line 48 "keywords" - {"username", el_username}, -#line 46 "keywords" +#line 55 "keywords" + {"parentid", el_parentid}, +#line 47 "keywords" {"timestamp", el_timestamp}, {""}, {""}, -#line 53 "keywords" +#line 54 "keywords" {"ns", el_ns}, -#line 45 "keywords" +#line 46 "keywords" {"revision", el_revision}, -#line 40 "keywords" +#line 41 "keywords" {"namespace", el_namespace}, {""}, {""}, +#line 44 "keywords" + {"id", el_id}, #line 49 "keywords" - {"ip", el_ip}, -#line 34 "keywords" - {"siteinfo", el_siteinfo}, -#line 33 "keywords" - {"mediawiki", el_mediawiki}, -#line 39 "keywords" + {"username", el_username}, +#line 38 "keywords" + {"generator", el_generator}, +#line 40 "keywords" {"namespaces", el_namespaces}, {""}, -#line 43 "keywords" - {"id", el_id}, -#line 54 "keywords" - {"parentid", el_parentid}, +#line 56 "keywords" + {"sha1", el_sha1}, +#line 35 "keywords" + {"siteinfo", el_siteinfo}, #line 37 "keywords" - {"generator", el_generator}, - {""}, {""}, {""}, {""}, -#line 55 "keywords" - {"sha1", el_sha1} + {"base", el_base}, + {""}, {""}, +#line 50 "keywords" + {"ip", el_ip}, + {""}, +#line 34 "keywords" + {"mediawiki", el_mediawiki} }; if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)