Browse Source

Merge Onigmo 6.0.0

* https://github.com/k-takata/Onigmo/blob/Onigmo-6.0.0/HISTORY
* fix for ruby 2.4: https://github.com/k-takata/Onigmo/pull/78
* suppress warning: https://github.com/k-takata/Onigmo/pull/79
* include/ruby/oniguruma.h: include onigmo.h.
* template/encdb.h.tmpl: ignore duplicated definition of EUC-CN in
  enc/euc_kr.c. It is defined in enc/gb2313.c with CRuby macro.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@57045 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
pull/1483/merge
naruse 3 years ago
parent
commit
2873edeafb
57 changed files with 3108 additions and 3183 deletions
  1. +2
    -0
      NEWS
  2. +6
    -3
      enc/ascii.c
  3. +3
    -3
      enc/big5.c
  4. +1
    -1
      enc/cp949.c
  5. +2
    -2
      enc/emacs_mule.c
  6. +2
    -2
      enc/euc_jp.c
  7. +26
    -1
      enc/euc_kr.c
  8. +1
    -1
      enc/euc_tw.c
  9. +1
    -2
      enc/gb18030.c
  10. +1
    -1
      enc/gbk.c
  11. +17
    -16
      enc/iso_8859_1.c
  12. +21
    -20
      enc/iso_8859_10.c
  13. +1
    -1
      enc/iso_8859_11.c
  14. +21
    -20
      enc/iso_8859_13.c
  15. +25
    -25
      enc/iso_8859_14.c
  16. +24
    -23
      enc/iso_8859_15.c
  17. +23
    -23
      enc/iso_8859_16.c
  18. +17
    -17
      enc/iso_8859_2.c
  19. +22
    -21
      enc/iso_8859_3.c
  20. +15
    -14
      enc/iso_8859_4.c
  21. +11
    -11
      enc/iso_8859_5.c
  22. +1
    -1
      enc/iso_8859_6.c
  23. +24
    -24
      enc/iso_8859_7.c
  24. +1
    -1
      enc/iso_8859_8.c
  25. +24
    -23
      enc/iso_8859_9.c
  26. +1
    -2
      enc/koi8_r.c
  27. +1
    -1
      enc/koi8_u.c
  28. +32
    -10
      enc/mktable.c
  29. +2
    -2
      enc/shift_jis.c
  30. +125
    -123
      enc/unicode.c
  31. +6
    -3
      enc/us_ascii.c
  32. +1
    -1
      enc/utf_16be.c
  33. +1
    -1
      enc/utf_16le.c
  34. +1
    -2
      enc/utf_32be.c
  35. +1
    -1
      enc/utf_32le.c
  36. +8
    -6
      enc/utf_8.c
  37. +18
    -17
      enc/windows_1250.c
  38. +18
    -17
      enc/windows_1251.c
  39. +15
    -14
      enc/windows_1252.c
  40. +22
    -21
      enc/windows_1253.c
  41. +24
    -23
      enc/windows_1254.c
  42. +25
    -25
      enc/windows_1257.c
  43. +2
    -2
      enc/windows_31j.c
  44. +934
    -0
      include/ruby/onigmo.h
  45. +4
    -876
      include/ruby/oniguruma.h
  46. +1
    -2
      re.c
  47. +276
    -307
      regcomp.c
  48. +34
    -33
      regenc.c
  49. +75
    -61
      regenc.h
  50. +21
    -44
      regerror.c
  51. +450
    -445
      regexec.c
  52. +174
    -242
      regint.h
  53. +491
    -608
      regparse.c
  54. +29
    -27
      regparse.h
  55. +8
    -7
      regsyntax.c
  56. +2
    -1
      template/encdb.h.tmpl
  57. +14
    -3
      tool/enc-unicode.rb

+ 2
- 0
NEWS View File

@@ -138,6 +138,8 @@ with all sufficient information, see the ChangeLog file or Redmine
* meta character \X matches Unicode 9.0 characters with some workarounds
for UTR #51 Unicode Emoji, Version 4.0 emoji zwj sequences.

* Update Onigmo 6.0.0.

* Regexp/String: Updated Unicode version from 8.0.0 to 9.0.0 [Feature #12513]

* RubyVM::Env


+ 6
- 3
enc/ascii.c View File

@@ -29,9 +29,12 @@
*/

#include "regenc.h"
#include "encindex.h"
#ifdef RUBY
# include "encindex.h"
#endif

#ifndef ENCINDEX_ASCII
#define ENCINDEX_ASCII 0
# define ENCINDEX_ASCII 0
#endif

OnigEncodingDefine(ascii, ASCII) = {
@@ -51,9 +54,9 @@ OnigEncodingDefine(ascii, ASCII) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
onigenc_single_byte_ascii_only_case_map,
ENCINDEX_ASCII,
ONIGENC_FLAG_NONE,
onigenc_single_byte_ascii_only_case_map,
};
ENC_ALIAS("BINARY", "ASCII-8BIT")
ENC_REPLICATE("IBM437", "ASCII-8BIT")


+ 3
- 3
enc/big5.c View File

@@ -300,9 +300,9 @@ OnigEncodingDefine(big5, BIG5) = {
onigenc_not_support_get_ctype_code_range,
big5_left_adjust_char_head,
big5_is_allowed_reverse_match,
onigenc_ascii_only_case_map,
0,
ONIGENC_FLAG_NONE,
onigenc_ascii_only_case_map,
};

/*
@@ -335,9 +335,9 @@ OnigEncodingDefine(big5_hkscs, BIG5_HKSCS) = {
onigenc_not_support_get_ctype_code_range,
big5_left_adjust_char_head,
big5_is_allowed_reverse_match,
onigenc_ascii_only_case_map,
0,
ONIGENC_FLAG_NONE,
onigenc_ascii_only_case_map,
};
ENC_ALIAS("Big5-HKSCS:2008", "Big5-HKSCS")

@@ -370,7 +370,7 @@ OnigEncodingDefine(big5_uao, BIG5_UAO) = {
onigenc_not_support_get_ctype_code_range,
big5_left_adjust_char_head,
big5_is_allowed_reverse_match,
onigenc_ascii_only_case_map,
0,
ONIGENC_FLAG_NONE,
onigenc_ascii_only_case_map,
};

+ 1
- 1
enc/cp949.c View File

@@ -211,9 +211,9 @@ OnigEncodingDefine(cp949, CP949) = {
onigenc_not_support_get_ctype_code_range,
cp949_left_adjust_char_head,
cp949_is_allowed_reverse_match,
onigenc_ascii_only_case_map,
0,
ONIGENC_FLAG_NONE,
onigenc_ascii_only_case_map,
};
/*
* Name: CP949


+ 2
- 2
enc/emacs_mule.c View File

@@ -27,7 +27,7 @@
* SUCH DAMAGE.
*/

#include "regint.h"
#include "regenc.h"


#define emacsmule_islead(c) ((UChar )(c) < 0x9e)
@@ -334,9 +334,9 @@ OnigEncodingDefine(emacs_mule, Emacs_Mule) = {
onigenc_not_support_get_ctype_code_range,
left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
onigenc_ascii_only_case_map,
0,
ONIGENC_FLAG_NONE,
onigenc_ascii_only_case_map,
};

ENC_REPLICATE("stateless-ISO-2022-JP", "Emacs-Mule")

+ 2
- 2
enc/euc_jp.c View File

@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*/

#include "regint.h"
#include "regenc.h"

#define eucjp_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1)

@@ -576,9 +576,9 @@ OnigEncodingDefine(euc_jp, EUC_JP) = {
get_ctype_code_range,
left_adjust_char_head,
is_allowed_reverse_match,
onigenc_ascii_only_case_map,
0,
ONIGENC_FLAG_NONE,
onigenc_ascii_only_case_map,
};
/*
* Name: EUC-JP


+ 26
- 1
enc/euc_kr.c View File

@@ -188,8 +188,33 @@ OnigEncodingDefine(euc_kr, EUC_KR) = {
onigenc_not_support_get_ctype_code_range,
euckr_left_adjust_char_head,
euckr_is_allowed_reverse_match,
onigenc_ascii_only_case_map,
0,
ONIGENC_FLAG_NONE,
onigenc_ascii_only_case_map,
};
ENC_ALIAS("eucKR", "EUC-KR")

#ifndef RUBY
/* Same with OnigEncodingEUC_KR except the name */
OnigEncodingDefine(euc_cn, EUC_CN) = {
euckr_mbc_enc_len,
"EUC-CN", /* name */
2, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
euckr_mbc_to_code,
onigenc_mb2_code_to_mbclen,
euckr_code_to_mbc,
euckr_mbc_case_fold,
onigenc_ascii_apply_all_case_fold,
onigenc_ascii_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
euckr_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
euckr_left_adjust_char_head,
euckr_is_allowed_reverse_match,
onigenc_ascii_only_case_map,
0,
ONIGENC_FLAG_NONE,
};
#endif /* RUBY */

+ 1
- 1
enc/euc_tw.c View File

@@ -221,8 +221,8 @@ OnigEncodingDefine(euc_tw, EUC_TW) = {
onigenc_not_support_get_ctype_code_range,
euctw_left_adjust_char_head,
euctw_is_allowed_reverse_match,
onigenc_ascii_only_case_map,
0,
ONIGENC_FLAG_NONE,
onigenc_ascii_only_case_map,
};
ENC_ALIAS("eucTW", "EUC-TW")

+ 1
- 2
enc/gb18030.c View File

@@ -597,8 +597,7 @@ OnigEncodingDefine(gb18030, GB18030) = {
onigenc_not_support_get_ctype_code_range,
gb18030_left_adjust_char_head,
gb18030_is_allowed_reverse_match,
onigenc_ascii_only_case_map,
0,
ONIGENC_FLAG_NONE,
onigenc_ascii_only_case_map,
};


+ 1
- 1
enc/gbk.c View File

@@ -211,9 +211,9 @@ OnigEncodingDefine(gbk, GBK) = {
onigenc_not_support_get_ctype_code_range,
gbk_left_adjust_char_head,
gbk_is_allowed_reverse_match,
onigenc_ascii_only_case_map,
0,
ONIGENC_FLAG_NONE,
onigenc_ascii_only_case_map,
};
/*
* Name: GBK


+ 17
- 16
enc/iso_8859_1.c View File

@@ -256,45 +256,46 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSE
}

static int
case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
const struct OnigEncodingTypeST* enc)
case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
const struct OnigEncodingTypeST* enc)
{
OnigCodePoint code;
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;

while (*pp<end && to<to_end) {
while (*pp < end && to < to_end) {
code = *(*pp)++;
if (code==SHARP_s) {
if (flags&ONIGENC_CASE_UPCASE) {
if (code == SHARP_s) {
if (flags & ONIGENC_CASE_UPCASE) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 'S';
code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
}
else if (flags&ONIGENC_CASE_FOLD) {
else if (flags & ONIGENC_CASE_FOLD) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 's';
code = 's';
}
}
else if ((EncISO_8859_1_CtypeTable[code] & BIT_CTYPE_UPPER)
&& (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
&& (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
flags |= ONIGENC_CASE_MODIFIED;
code += 0x20;
}
else if (code==0xAA || code==0xBA || code==0xB5 || code==0xFF) ;
else if ((EncISO_8859_1_CtypeTable[code]&BIT_CTYPE_LOWER)
&& (flags&ONIGENC_CASE_UPCASE)) {
else if (code == 0xAA || code == 0xBA || code == 0xB5 || code == 0xFF)
;
else if ((EncISO_8859_1_CtypeTable[code] & BIT_CTYPE_LOWER)
&& (flags & ONIGENC_CASE_UPCASE)) {
flags |= ONIGENC_CASE_MODIFIED;
code -= 0x20;
}
*to++ = code;
if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
}
*flagP = flags;
return (int)(to-to_start);
return (int )(to - to_start);
}

OnigEncodingDefine(iso_8859_1, ISO_8859_1) = {
@@ -314,8 +315,8 @@ OnigEncodingDefine(iso_8859_1, ISO_8859_1) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
case_map,
0,
ONIGENC_FLAG_NONE,
case_map,
};
ENC_ALIAS("ISO8859-1", "ISO-8859-1")

+ 21
- 20
enc/iso_8859_10.c View File

@@ -215,9 +215,9 @@ apply_all_case_fold(OnigCaseFoldType flag,

static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end,
OnigCaseFoldCodeItem items[],
OnigEncoding enc ARG_UNUSED)
const OnigUChar* p, const OnigUChar* end,
OnigCaseFoldCodeItem items[],
OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
numberof(CaseFoldMap), CaseFoldMap, 1,
@@ -225,48 +225,49 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
}

static int
case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
const struct OnigEncodingTypeST* enc)
case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
const struct OnigEncodingTypeST* enc)
{
OnigCodePoint code;
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;

while (*pp<end && to<to_end) {
while (*pp < end && to < to_end) {
code = *(*pp)++;
if (code==SHARP_s) {
if (flags&ONIGENC_CASE_UPCASE) {
if (code == SHARP_s) {
if (flags & ONIGENC_CASE_UPCASE) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 'S';
code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
}
else if (flags&ONIGENC_CASE_FOLD) {
else if (flags & ONIGENC_CASE_FOLD) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 's';
code = 's';
}
}
else if (code==0xBD || code==0xFF) ;
else if (code == 0xBD || code == 0xFF)
;
else if ((EncISO_8859_10_CtypeTable[code] & BIT_CTYPE_UPPER)
&& (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
&& (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
flags |= ONIGENC_CASE_MODIFIED;
code = ENC_ISO_8859_10_TO_LOWER_CASE(code);
}
else if ((EncISO_8859_10_CtypeTable[code]&BIT_CTYPE_LOWER)
&& (flags&ONIGENC_CASE_UPCASE)) {
else if ((EncISO_8859_10_CtypeTable[code] & BIT_CTYPE_LOWER)
&& (flags & ONIGENC_CASE_UPCASE)) {
flags |= ONIGENC_CASE_MODIFIED;
if (code>=0xA0 && code<=0xBF)
if (code >= 0xA0 && code <= 0xBF)
code -= 0x10;
else
code -= 0x20;
}
*to++ = code;
if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
}
*flagP = flags;
return (int)(to-to_start);
return (int )(to - to_start);
}

OnigEncodingDefine(iso_8859_10, ISO_8859_10) = {
@@ -286,8 +287,8 @@ OnigEncodingDefine(iso_8859_10, ISO_8859_10) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
case_map,
0,
ONIGENC_FLAG_NONE,
case_map,
};
ENC_ALIAS("ISO8859-10", "ISO-8859-10")

+ 1
- 1
enc/iso_8859_11.c View File

@@ -93,9 +93,9 @@ OnigEncodingDefine(iso_8859_11, ISO_8859_11) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
onigenc_single_byte_ascii_only_case_map,
0,
ONIGENC_FLAG_NONE,
onigenc_single_byte_ascii_only_case_map,
};
ENC_ALIAS("ISO8859-11", "ISO-8859-11")



+ 21
- 20
enc/iso_8859_13.c View File

@@ -208,9 +208,9 @@ apply_all_case_fold(OnigCaseFoldType flag,

static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end,
OnigCaseFoldCodeItem items[],
OnigEncoding enc ARG_UNUSED)
const OnigUChar* p, const OnigUChar* end,
OnigCaseFoldCodeItem items[],
OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
numberof(CaseFoldMap), CaseFoldMap, 1,
@@ -218,38 +218,39 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
}

static int
case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
const struct OnigEncodingTypeST* enc)
case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
const struct OnigEncodingTypeST* enc)
{
OnigCodePoint code;
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;

while (*pp<end && to<to_end) {
while (*pp < end && to < to_end) {
code = *(*pp)++;
if (code==SHARP_s) {
if (flags&ONIGENC_CASE_UPCASE) {
if (code == SHARP_s) {
if (flags & ONIGENC_CASE_UPCASE) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 'S';
code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
}
else if (flags&ONIGENC_CASE_FOLD) {
else if (flags & ONIGENC_CASE_FOLD) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 's';
code = 's';
}
}
else if ((EncISO_8859_13_CtypeTable[code] & BIT_CTYPE_UPPER)
&& (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
&& (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
flags |= ONIGENC_CASE_MODIFIED;
code = ENC_ISO_8859_13_TO_LOWER_CASE(code);
}
else if (code==0xB5) ;
else if ((EncISO_8859_13_CtypeTable[code]&BIT_CTYPE_LOWER)
&& (flags&ONIGENC_CASE_UPCASE)) {
else if (code == 0xB5)
;
else if ((EncISO_8859_13_CtypeTable[code] & BIT_CTYPE_LOWER)
&& (flags & ONIGENC_CASE_UPCASE)) {
flags |= ONIGENC_CASE_MODIFIED;
if (code==0xB8 || code==0xBA || code==0xBF) {
if (code == 0xB8 || code == 0xBA || code == 0xBF) {
code -= 0x10;
}
else {
@@ -257,11 +258,11 @@ case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
}
}
*to++ = code;
if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
}
*flagP = flags;
return (int)(to-to_start);
return (int )(to - to_start);
}

OnigEncodingDefine(iso_8859_13, ISO_8859_13) = {
@@ -281,8 +282,8 @@ OnigEncodingDefine(iso_8859_13, ISO_8859_13) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
case_map,
0,
ONIGENC_FLAG_NONE,
case_map,
};
ENC_ALIAS("ISO8859-13", "ISO-8859-13")

+ 25
- 25
enc/iso_8859_14.c View File

@@ -217,9 +217,9 @@ apply_all_case_fold(OnigCaseFoldType flag,

static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end,
OnigCaseFoldCodeItem items[],
OnigEncoding enc ARG_UNUSED)
const OnigUChar* p, const OnigUChar* end,
OnigCaseFoldCodeItem items[],
OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
numberof(CaseFoldMap), CaseFoldMap, 1,
@@ -227,58 +227,58 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
}

static int
case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
const struct OnigEncodingTypeST* enc)
case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
const struct OnigEncodingTypeST* enc)
{
OnigCodePoint code;
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;

while (*pp<end && to<to_end) {
while (*pp < end && to < to_end) {
code = *(*pp)++;
if (code==SHARP_s) {
if (flags&ONIGENC_CASE_UPCASE) {
if (code == SHARP_s) {
if (flags & ONIGENC_CASE_UPCASE) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 'S';
code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
}
else if (flags&ONIGENC_CASE_FOLD) {
else if (flags & ONIGENC_CASE_FOLD) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 's';
code = 's';
}
}
/* else if (code==0xAA || code==0xBA) ; */
/* else if (code == 0xAA || code == 0xBA) ; */
else if ((EncISO_8859_14_CtypeTable[code] & BIT_CTYPE_UPPER)
&& (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
&& (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
flags |= ONIGENC_CASE_MODIFIED;
code = ENC_ISO_8859_14_TO_LOWER_CASE(code);
}
else if ((EncISO_8859_14_CtypeTable[code]&BIT_CTYPE_LOWER)
&& (flags&ONIGENC_CASE_UPCASE)) {
else if ((EncISO_8859_14_CtypeTable[code] & BIT_CTYPE_LOWER)
&& (flags & ONIGENC_CASE_UPCASE)) {
flags |= ONIGENC_CASE_MODIFIED;
if(code == 0xA2 || code == 0xA5 || code == 0xB1 || code == 0xB3 || code == 0xB5 || code == 0xBE)
if (code == 0xA2 || code == 0xA5 || code == 0xB1 || code == 0xB3 || code == 0xB5 || code == 0xBE)
code -= 0x1;
else if(code == 0xAB)
else if (code == 0xAB)
code -= 0x5;
else if(code == 0xFF)
else if (code == 0xFF)
code -= 0x50;
else if(code == 0xB9)
else if (code == 0xB9)
code -= 0x2;
else if(code == 0xBF)
else if (code == 0xBF)
code -= 0x4;
else if(code == 0xB8 || code == 0xBA || code == 0xBC)
else if (code == 0xB8 || code == 0xBA || code == 0xBC)
code -= 0x10;
else
code -= 0x20;
}
*to++ = code;
if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
}
*flagP = flags;
return (int)(to-to_start);
return (int )(to - to_start);
}

OnigEncodingDefine(iso_8859_14, ISO_8859_14) = {
@@ -298,8 +298,8 @@ OnigEncodingDefine(iso_8859_14, ISO_8859_14) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
case_map,
0,
ONIGENC_FLAG_NONE,
case_map,
};
ENC_ALIAS("ISO8859-14", "ISO-8859-14")

+ 24
- 23
enc/iso_8859_15.c View File

@@ -211,9 +211,9 @@ apply_all_case_fold(OnigCaseFoldType flag,

static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end,
OnigCaseFoldCodeItem items[],
OnigEncoding enc ARG_UNUSED)
const OnigUChar* p, const OnigUChar* end,
OnigCaseFoldCodeItem items[],
OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
numberof(CaseFoldMap), CaseFoldMap, 1,
@@ -221,54 +221,55 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
}

static int
case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
const struct OnigEncodingTypeST* enc)
case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
const struct OnigEncodingTypeST* enc)
{
OnigCodePoint code;
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;

while (*pp<end && to<to_end) {
while (*pp < end && to < to_end) {
code = *(*pp)++;
if (code==SHARP_s) {
if (flags&ONIGENC_CASE_UPCASE) {
if (code == SHARP_s) {
if (flags & ONIGENC_CASE_UPCASE) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 'S';
code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
}
else if (flags&ONIGENC_CASE_FOLD) {
else if (flags & ONIGENC_CASE_FOLD) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 's';
code = 's';
}
}
else if (code==0xAA || code==0xBA || code==0xB5) ;
else if (code == 0xAA || code == 0xBA || code == 0xB5)
;
else if ((EncISO_8859_15_CtypeTable[code] & BIT_CTYPE_UPPER)
&& (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
&& (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
flags |= ONIGENC_CASE_MODIFIED;
code = ENC_ISO_8859_15_TO_LOWER_CASE(code);
}
else if ((EncISO_8859_15_CtypeTable[code]&BIT_CTYPE_LOWER)
&& (flags&ONIGENC_CASE_UPCASE)) {
else if ((EncISO_8859_15_CtypeTable[code] & BIT_CTYPE_LOWER)
&& (flags & ONIGENC_CASE_UPCASE)) {
flags |= ONIGENC_CASE_MODIFIED;
if (code==0xA8)
if (code == 0xA8)
code -= 2;
else if (code==0xB8)
else if (code == 0xB8)
code -= 4;
else if (code==0xBD)
else if (code == 0xBD)
code -= 1;
else if (code==0xFF)
else if (code == 0xFF)
code -= 0x41;
else
code -= 0x20;
}
*to++ = code;
if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
}
*flagP = flags;
return (int)(to-to_start);
return (int )(to - to_start);
}

OnigEncodingDefine(iso_8859_15, ISO_8859_15) = {
@@ -288,8 +289,8 @@ OnigEncodingDefine(iso_8859_15, ISO_8859_15) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
case_map,
0,
ONIGENC_FLAG_NONE,
case_map,
};
ENC_ALIAS("ISO8859-15", "ISO-8859-15")

+ 23
- 23
enc/iso_8859_16.c View File

@@ -213,9 +213,9 @@ apply_all_case_fold(OnigCaseFoldType flag,

static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end,
OnigCaseFoldCodeItem items[],
OnigEncoding enc ARG_UNUSED)
const OnigUChar* p, const OnigUChar* end,
OnigCaseFoldCodeItem items[],
OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
numberof(CaseFoldMap), CaseFoldMap, 1,
@@ -223,57 +223,57 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
}

static int
case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
const struct OnigEncodingTypeST* enc)
case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
const struct OnigEncodingTypeST* enc)
{
OnigCodePoint code;
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;

while (*pp<end && to<to_end) {
while (*pp < end && to < to_end) {
code = *(*pp)++;
if (code==SHARP_s) {
if (flags&ONIGENC_CASE_UPCASE) {
if (code == SHARP_s) {
if (flags & ONIGENC_CASE_UPCASE) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 'S';
code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
}
else if (flags&ONIGENC_CASE_FOLD) {
else if (flags & ONIGENC_CASE_FOLD) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 's';
code = 's';
}
}
else if ((EncISO_8859_16_CtypeTable[code] & BIT_CTYPE_UPPER)
&& (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
&& (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
flags |= ONIGENC_CASE_MODIFIED;
code = ENC_ISO_8859_16_TO_LOWER_CASE(code);
}
else if ((EncISO_8859_16_CtypeTable[code]&BIT_CTYPE_LOWER)
else if ((EncISO_8859_16_CtypeTable[code] & BIT_CTYPE_LOWER)
&& (flags&ONIGENC_CASE_UPCASE)) {
flags |= ONIGENC_CASE_MODIFIED;
if (code==0xA2 || code==0xBD)
if (code == 0xA2 || code == 0xBD)
code--;
else if (code==0xB3 || code==0xBA || code==0xBF)
else if (code == 0xB3 || code == 0xBA || code == 0xBF)
code -= 0x10;
else if (code==0xA8 || code==0xAE)
else if (code == 0xA8 || code == 0xAE)
code -= 0x02;
else if (code==0xB9)
else if (code == 0xB9)
code -= 0x07;
else if (code==0xB8)
else if (code == 0xB8)
code -= 0x04;
else if (code==0xFF)
else if (code == 0xFF)
code -= 0x41;
else
code -= 0x20;
}
*to++ = code;
if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
}
*flagP = flags;
return (int)(to-to_start);
return (int )(to - to_start);
}

OnigEncodingDefine(iso_8859_16, ISO_8859_16) = {
@@ -293,8 +293,8 @@ OnigEncodingDefine(iso_8859_16, ISO_8859_16) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
case_map,
0,
ONIGENC_FLAG_NONE,
case_map,
};
ENC_ALIAS("ISO8859-16", "ISO-8859-16")

+ 17
- 17
enc/iso_8859_2.c View File

@@ -221,50 +221,50 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSE
}

static int
case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
const struct OnigEncodingTypeST* enc)
case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
const struct OnigEncodingTypeST* enc)
{
OnigCodePoint code;
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;

while (*pp<end && to<to_end) {
while (*pp < end && to < to_end) {
code = *(*pp)++;
if (code==SHARP_s) {
if (flags&ONIGENC_CASE_UPCASE) {
if (code == SHARP_s) {
if (flags & ONIGENC_CASE_UPCASE) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 'S';
code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
}
else if (flags&ONIGENC_CASE_FOLD) {
else if (flags & ONIGENC_CASE_FOLD) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 's';
code = 's';
}
}
else if ((EncISO_8859_2_CtypeTable[code] & BIT_CTYPE_UPPER)
&& (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
&& (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
flags |= ONIGENC_CASE_MODIFIED;
code = ENC_ISO_8859_2_TO_LOWER_CASE(code);
}
else if ((EncISO_8859_2_CtypeTable[code]&BIT_CTYPE_LOWER)
&& (flags&ONIGENC_CASE_UPCASE)) {
if (code>=0xB1 && code<=0xBF){
else if ((EncISO_8859_2_CtypeTable[code] & BIT_CTYPE_LOWER)
&& (flags & ONIGENC_CASE_UPCASE)) {
if (code >= 0xB1 && code <= 0xBF) {
flags |= ONIGENC_CASE_MODIFIED;
code -= 0x10;
}
else{
else {
flags |= ONIGENC_CASE_MODIFIED;
code -= 0x20;
}
}
*to++ = code;
if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
}
*flagP = flags;
return (int)(to-to_start);
return (int )(to - to_start);
}

OnigEncodingDefine(iso_8859_2, ISO_8859_2) = {
@@ -284,8 +284,8 @@ OnigEncodingDefine(iso_8859_2, ISO_8859_2) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
case_map,
0,
ONIGENC_FLAG_NONE,
case_map,
};
ENC_ALIAS("ISO8859-2", "ISO-8859-2")

+ 22
- 21
enc/iso_8859_3.c View File

@@ -223,45 +223,46 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
#define DOTLESS_i (0xB9)
#define I_WITH_DOT_ABOVE (0xA9)
static int
case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
const struct OnigEncodingTypeST* enc)
case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
const struct OnigEncodingTypeST* enc)
{
OnigCodePoint code;
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;

while (*pp<end && to<to_end) {
while (*pp < end && to < to_end) {
code = *(*pp)++;
if (code==SHARP_s) {
if (flags&ONIGENC_CASE_UPCASE) {
if (code == SHARP_s) {
if (flags & ONIGENC_CASE_UPCASE) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 'S';
code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
}
else if (flags&ONIGENC_CASE_FOLD) {
else if (flags & ONIGENC_CASE_FOLD) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 's';
code = 's';
}
}
else if (code==0xB5) ;
else if (code == 0xB5)
;
else if ((EncISO_8859_3_CtypeTable[code] & BIT_CTYPE_UPPER)
&& (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
&& (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
flags |= ONIGENC_CASE_MODIFIED;
if (code=='I')
code = flags&ONIGENC_CASE_FOLD_TURKISH_AZERI ? DOTLESS_i : 'i';
if (code == 'I')
code = flags & ONIGENC_CASE_FOLD_TURKISH_AZERI ? DOTLESS_i : 'i';
else
code = ENC_ISO_8859_3_TO_LOWER_CASE(code);
}
else if ((EncISO_8859_3_CtypeTable[code]&BIT_CTYPE_LOWER)
&& (flags&ONIGENC_CASE_UPCASE)) {
&& (flags & ONIGENC_CASE_UPCASE)) {
flags |= ONIGENC_CASE_MODIFIED;
if (code=='i')
code = flags&ONIGENC_CASE_FOLD_TURKISH_AZERI ? I_WITH_DOT_ABOVE : 'I';
else if (code==DOTLESS_i)
if (code == 'i')
code = flags & ONIGENC_CASE_FOLD_TURKISH_AZERI ? I_WITH_DOT_ABOVE : 'I';
else if (code == DOTLESS_i)
code = 'I';
else if (code>=0xB0 && code<=0xBF ) {
else if (code >= 0xB0 && code <= 0xBF) {
code -= 0x10;
}
else {
@@ -269,11 +270,11 @@ case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
}
}
*to++ = code;
if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
}
*flagP = flags;
return (int)(to-to_start);
return (int )(to - to_start);
}

OnigEncodingDefine(iso_8859_3, ISO_8859_3) = {
@@ -293,8 +294,8 @@ OnigEncodingDefine(iso_8859_3, ISO_8859_3) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
case_map,
0,
ONIGENC_FLAG_NONE,
case_map,
};
ENC_ALIAS("ISO8859-3", "ISO-8859-3")

+ 15
- 14
enc/iso_8859_4.c View File

@@ -232,31 +232,32 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;

while (*pp<end && to<to_end) {
while (*pp < end && to < to_end) {
code = *(*pp)++;
if (code==SHARP_s) {
if (flags&ONIGENC_CASE_UPCASE) {
if (code == SHARP_s) {
if (flags & ONIGENC_CASE_UPCASE) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 'S';
code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
}
else if (flags&ONIGENC_CASE_FOLD) {
else if (flags & ONIGENC_CASE_FOLD) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 's';
code = 's';
}
}
else if ((EncISO_8859_4_CtypeTable[code] & BIT_CTYPE_UPPER)
&& (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
&& (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
flags |= ONIGENC_CASE_MODIFIED;
code = ENC_ISO_8859_4_TO_LOWER_CASE(code);
}
else if (code==0xA2) ;
else if (code == 0xA2)
;
else if ((EncISO_8859_4_CtypeTable[code]&BIT_CTYPE_LOWER)
&& (flags&ONIGENC_CASE_UPCASE)) {
&& (flags & ONIGENC_CASE_UPCASE)) {
flags |= ONIGENC_CASE_MODIFIED;
if (code>=0xA0&&code<=0xBF) {
if (code==0xBF)
if (code >= 0xA0 && code <= 0xBF) {
if (code == 0xBF)
code -= 0x02;
else
code -= 0x10;
@@ -265,11 +266,11 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
code -= 0x20;
}
*to++ = code;
if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
}
*flagP = flags;
return (int)(to-to_start);
return (int )(to - to_start);
}

OnigEncodingDefine(iso_8859_4, ISO_8859_4) = {
@@ -289,8 +290,8 @@ OnigEncodingDefine(iso_8859_4, ISO_8859_4) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
case_map,
0,
ONIGENC_FLAG_NONE,
case_map,
};
ENC_ALIAS("ISO8859-4", "ISO-8859-4")

+ 11
- 11
enc/iso_8859_5.c View File

@@ -210,35 +210,35 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
}

static int
case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
const struct OnigEncodingTypeST* enc)
case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
const struct OnigEncodingTypeST* enc)
{
OnigCodePoint code;
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;

while (*pp<end && to<to_end) {
while (*pp < end && to < to_end) {
code = *(*pp)++;
if ((EncISO_8859_5_CtypeTable[code] & BIT_CTYPE_UPPER)
&& (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
&& (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
flags |= ONIGENC_CASE_MODIFIED;
code = ENC_ISO_8859_5_TO_LOWER_CASE(code);
}
else if ((EncISO_8859_5_CtypeTable[code]&BIT_CTYPE_LOWER)
&& (flags&ONIGENC_CASE_UPCASE)) {
&& (flags & ONIGENC_CASE_UPCASE)) {
flags |= ONIGENC_CASE_MODIFIED;
if (0xF1<=code && code<=0xFF)
if (0xF1 <= code && code <= 0xFF)
code -= 0x50;
else
code -= 0x20;
}
*to++ = code;
if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
}
*flagP = flags;
return (int)(to-to_start);
return (int )(to - to_start);
}

OnigEncodingDefine(iso_8859_5, ISO_8859_5) = {
@@ -258,8 +258,8 @@ OnigEncodingDefine(iso_8859_5, ISO_8859_5) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
case_map,
0,
ONIGENC_FLAG_NONE,
case_map,
};
ENC_ALIAS("ISO8859-5", "ISO-8859-5")

+ 1
- 1
enc/iso_8859_6.c View File

@@ -93,9 +93,9 @@ OnigEncodingDefine(iso_8859_6, ISO_8859_6) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
onigenc_single_byte_ascii_only_case_map,
0,
ONIGENC_FLAG_NONE,
onigenc_single_byte_ascii_only_case_map,
};
ENC_ALIAS("ISO8859-6", "ISO-8859-6")



+ 24
- 24
enc/iso_8859_7.c View File

@@ -206,58 +206,58 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
}

static int
case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
const struct OnigEncodingTypeST* enc)
case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
const struct OnigEncodingTypeST* enc)
{
OnigCodePoint code;
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;

while (*pp<end && to<to_end) {
while (*pp < end && to < to_end) {
code = *(*pp)++;
if (code==0xF2) {
if (flags&ONIGENC_CASE_UPCASE) {
if (code == 0xF2) {
if (flags & ONIGENC_CASE_UPCASE) {
flags |= ONIGENC_CASE_MODIFIED;
code = 0xD3;
}
else if (flags&ONIGENC_CASE_FOLD) {
else if (flags & ONIGENC_CASE_FOLD) {
flags |= ONIGENC_CASE_MODIFIED;
code = 0xF3;
}
}
else if ((EncISO_8859_7_CtypeTable[code] & BIT_CTYPE_UPPER)
&& (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
&& (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
flags |= ONIGENC_CASE_MODIFIED;
code = ENC_ISO_8859_7_TO_LOWER_CASE(code);
}
else if (code==0xC0 || code==0xE0)
;
else if (code == 0xC0 || code == 0xE0)
;
else if ((EncISO_8859_7_CtypeTable[code]&BIT_CTYPE_LOWER)
&& (flags&ONIGENC_CASE_UPCASE)) {
&& (flags & ONIGENC_CASE_UPCASE)) {
flags |= ONIGENC_CASE_MODIFIED;
if (code==0xDC) {
code-=0x26;
if (code == 0xDC) {
code -= 0x26;
}
else if (code>=0xDD && code<=0xDF) {
code-=0x25;
else if (code >= 0xDD && code <= 0xDF) {
code -= 0x25;
}
else if (code==0xFC) {
code-=0x40;
else if (code == 0xFC) {
code -= 0x40;
}
else if (code==0xFD || code==0xFE) {
code-=0x3F;
else if (code == 0xFD || code == 0xFE) {
code -= 0x3F;
}
else {
code-=0x20;
code -= 0x20;
}
}
*to++ = code;
if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
}
*flagP = flags;
return (int)(to-to_start);
return (int )(to - to_start);
}

OnigEncodingDefine(iso_8859_7, ISO_8859_7) = {
@@ -277,8 +277,8 @@ OnigEncodingDefine(iso_8859_7, ISO_8859_7) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
case_map,
0,
ONIGENC_FLAG_NONE,
case_map,
};
ENC_ALIAS("ISO8859-7", "ISO-8859-7")

+ 1
- 1
enc/iso_8859_8.c View File

@@ -93,9 +93,9 @@ OnigEncodingDefine(iso_8859_8, ISO_8859_8) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
onigenc_single_byte_ascii_only_case_map,
0,
ONIGENC_FLAG_NONE,
onigenc_single_byte_ascii_only_case_map,
};
ENC_ALIAS("ISO8859-8", "ISO-8859-8")



+ 24
- 23
enc/iso_8859_9.c View File

@@ -204,9 +204,9 @@ apply_all_case_fold(OnigCaseFoldType flag,

static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end,
OnigCaseFoldCodeItem items[],
OnigEncoding enc ARG_UNUSED)
const OnigUChar* p, const OnigUChar* end,
OnigCaseFoldCodeItem items[],
OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
numberof(CaseFoldMap), CaseFoldMap, 1,
@@ -216,53 +216,54 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
#define DOTLESS_i (0xFD)
#define I_WITH_DOT_ABOVE (0xDD)
static int
case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
const struct OnigEncodingTypeST* enc)
case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
const struct OnigEncodingTypeST* enc)
{
OnigCodePoint code;
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;

while (*pp<end && to<to_end) {
while (*pp < end && to < to_end) {
code = *(*pp)++;
if (code==SHARP_s) {
if (flags&ONIGENC_CASE_UPCASE) {
if (code == SHARP_s) {
if (flags & ONIGENC_CASE_UPCASE) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 'S';
code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
}
else if (flags&ONIGENC_CASE_FOLD) {
else if (flags & ONIGENC_CASE_FOLD) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 's';
code = 's';
}
}
else if (code==0xAA || code==0xB5 || code==0xBA || code==0xFF) ;
else if (code == 0xAA || code == 0xB5 || code == 0xBA || code == 0xFF)
;
else if ((EncISO_8859_9_CtypeTable[code] & BIT_CTYPE_UPPER)
&& (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
&& (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
flags |= ONIGENC_CASE_MODIFIED;
if (code=='I')
code = flags&ONIGENC_CASE_FOLD_TURKISH_AZERI ? DOTLESS_i : 'i';
if (code == 'I')
code = flags & ONIGENC_CASE_FOLD_TURKISH_AZERI ? DOTLESS_i : 'i';
else
code = ENC_ISO_8859_9_TO_LOWER_CASE(code);
}
else if ((EncISO_8859_9_CtypeTable[code]&BIT_CTYPE_LOWER)
&& (flags&ONIGENC_CASE_UPCASE)) {
&& (flags & ONIGENC_CASE_UPCASE)) {
flags |= ONIGENC_CASE_MODIFIED;
if (code=='i')
code = flags&ONIGENC_CASE_FOLD_TURKISH_AZERI ? I_WITH_DOT_ABOVE : 'I';
else if (code==DOTLESS_i)
if (code == 'i')
code = flags & ONIGENC_CASE_FOLD_TURKISH_AZERI ? I_WITH_DOT_ABOVE : 'I';
else if (code == DOTLESS_i)
code = 'I';
else
code -= 0x20;
}
*to++ = code;
if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
}
*flagP = flags;
return (int)(to-to_start);
return (int )(to - to_start);
}

OnigEncodingDefine(iso_8859_9, ISO_8859_9) = {
@@ -282,8 +283,8 @@ OnigEncodingDefine(iso_8859_9, ISO_8859_9) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
case_map,
0,
ONIGENC_FLAG_NONE,
case_map,
};
ENC_ALIAS("ISO8859-9", "ISO-8859-9")

+ 1
- 2
enc/koi8_r.c View File

@@ -214,9 +214,8 @@ OnigEncodingDefine(koi8_r, KOI8_R) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
onigenc_single_byte_ascii_only_case_map,
0,
ONIGENC_FLAG_NONE,
onigenc_single_byte_ascii_only_case_map,
};
ENC_ALIAS("CP878", "KOI8-R")


+ 1
- 1
enc/koi8_u.c View File

@@ -218,7 +218,7 @@ OnigEncodingDefine(koi8_u, KOI8_U) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
onigenc_single_byte_ascii_only_case_map,
0,
ONIGENC_FLAG_NONE,
onigenc_single_byte_ascii_only_case_map,
};

+ 32
- 10
enc/mktable.c View File

@@ -2,7 +2,7 @@
mktable.c
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -31,7 +31,10 @@
#include <stdio.h>
#include <locale.h>

#ifndef __USE_ISOC99
#define __USE_ISOC99
#endif

#include <ctype.h>

#include "regenc.h"
@@ -1108,11 +1111,13 @@ static int exec(FILE* fp, ENC_INFO* einfo)
#define NCOL 8

int c, val, enc;
int r;

enc = einfo->num;

fprintf(fp, "static const unsigned short Enc%s_CtypeTable[256] = {\n",
einfo->name);
r = fprintf(fp, "static const unsigned short Enc%s_CtypeTable[256] = {\n",
einfo->name);
if (r < 0) return -1;

for (c = 0; c < 256; c++) {
val = 0;
@@ -1131,20 +1136,33 @@ static int exec(FILE* fp, ENC_INFO* einfo)
if (IsWord (enc, c)) val |= BIT_CTYPE_WORD;
if (IsAscii (enc, c)) val |= BIT_CTYPE_ASCII;

if (c % NCOL == 0) fputs(" ", fp);
fprintf(fp, "0x%04x", val);
if (c != 255) fputs(",", fp);
if (c % NCOL == 0) {
r = fputs(" ", fp);
if (r < 0) return -1;
}
r = fprintf(fp, "0x%04x", val);
if (r < 0) return -1;

if (c != 255) {
r = fputs(",", fp);
if (r < 0) return -1;
}
if (c != 0 && c % NCOL == (NCOL-1))
fputs("\n", fp);
r = fputs("\n", fp);
else
fputs(" ", fp);
r = fputs(" ", fp);

if (r < 0) return -1;
}
fprintf(fp, "};\n");
r = fprintf(fp, "};\n");
if (r < 0) return -1;

return 0;
}

extern int main(int argc ARG_UNUSED, char* argv[] ARG_UNUSED)
{
int r;
int i;
FILE* fp = stdout;

@@ -1155,7 +1173,11 @@ extern int main(int argc ARG_UNUSED, char* argv[] ARG_UNUSED)
/* setlocale(LC_ALL, "fr_FR.iso88591"); */

for (i = 0; i < (int )(sizeof(Info)/sizeof(ENC_INFO)); i++) {
exec(fp, &Info[i]);
r = exec(fp, &Info[i]);
if (r < 0) {
fprintf(stderr, "FAIL exec(): %d\n", r);
return -1;
}
}

return 0;


+ 2
- 2
enc/shift_jis.c View File

@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*/

#include "regint.h"
#include "regenc.h"

static const int EncLen_SJIS[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -563,9 +563,9 @@ OnigEncodingDefine(shift_jis, Shift_JIS) = {
get_ctype_code_range,
left_adjust_char_head,
is_allowed_reverse_match,
onigenc_ascii_only_case_map,
0,
ONIGENC_FLAG_NONE,
onigenc_ascii_only_case_map,
};
/*
* Name: Shift_JIS


+ 125
- 123
enc/unicode.c View File

@@ -139,17 +139,17 @@ code3_equal(const OnigCodePoint *x, const OnigCodePoint *y)

/* macros related to ONIGENC_CASE flags */
/* defined here because not used in other files */
#define ONIGENC_CASE_SPECIALS (ONIGENC_CASE_TITLECASE|ONIGENC_CASE_IS_TITLECASE|ONIGENC_CASE_UP_SPECIAL|ONIGENC_CASE_DOWN_SPECIAL)
#define ONIGENC_CASE_SPECIALS (ONIGENC_CASE_TITLECASE | ONIGENC_CASE_IS_TITLECASE | ONIGENC_CASE_UP_SPECIAL | ONIGENC_CASE_DOWN_SPECIAL)

/* macros for length in CaseMappingSpecials array in enc/unicode/casefold.h */
#define SpecialsLengthOffset 25 /* needs to be higher than the 22 bits used for Unicode codepoints */
#define SpecialsLengthExtract(n) ((n)>>SpecialsLengthOffset)
#define SpecialsCodepointExtract(n) ((n)&((1<<SpecialsLengthOffset)-1))
#define SpecialsLengthEncode(n) ((n)<<SpecialsLengthOffset)
#define SpecialsLengthExtract(n) ((n) >> SpecialsLengthOffset)
#define SpecialsCodepointExtract(n) ((n) & ((1 << SpecialsLengthOffset) - 1))
#define SpecialsLengthEncode(n) ((n) << SpecialsLengthOffset)

#define OnigSpecialIndexMask (((1<<OnigSpecialIndexWidth)-1)<<OnigSpecialIndexShift)
#define OnigSpecialIndexEncode(n) ((n)<<OnigSpecialIndexShift)
#define OnigSpecialIndexDecode(n) (((n)&OnigSpecialIndexMask)>>OnigSpecialIndexShift)
#define OnigSpecialIndexMask (((1 << OnigSpecialIndexWidth) - 1) << OnigSpecialIndexShift)
#define OnigSpecialIndexEncode(n) ((n) << OnigSpecialIndexShift)
#define OnigSpecialIndexDecode(n) (((n) & OnigSpecialIndexMask) >> OnigSpecialIndexShift)

/* macros to shorten "enc/unicode/casefold.h", undefined immediately after including the file */
#define U ONIGENC_CASE_UPCASE
@@ -660,128 +660,130 @@ onigenc_unicode_case_map(OnigCaseFoldType* flagP,
OnigUChar* to, OnigUChar* to_end,
const struct OnigEncodingTypeST* enc)
{
OnigCodePoint code;
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;
int codepoint_length;

to_end -= CASE_MAPPING_SLACK;
/* copy flags ONIGENC_CASE_UPCASE and ONIGENC_CASE_DOWNCASE over to
* ONIGENC_CASE_UP_SPECIAL and ONIGENC_CASE_DOWN_SPECIAL */
flags |= (flags&(ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE))<<ONIGENC_CASE_SPECIAL_OFFSET;

while (*pp<end && to<=to_end) {
codepoint_length = ONIGENC_PRECISE_MBC_ENC_LEN(enc, *pp, end);
if (codepoint_length < 0)
return codepoint_length; /* encoding invalid */
code = ONIGENC_MBC_TO_CODE(enc, *pp, end);
*pp += codepoint_length;

if (code<='z') { /* ASCII comes first */
if (code>='a' && code<='z') {
if (flags&ONIGENC_CASE_UPCASE) {
MODIFIED;
if (flags&ONIGENC_CASE_FOLD_TURKISH_AZERI && code=='i')
code = I_WITH_DOT_ABOVE;
else
code += 'A'-'a';
}
}
else if (code>='A' && code<='Z') {
if (flags&(ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD)) {
MODIFIED;
if (flags&ONIGENC_CASE_FOLD_TURKISH_AZERI && code=='I')
code = DOTLESS_i;
else
code += 'a'-'A';
}
}
OnigCodePoint code;
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;
int codepoint_length;

to_end -= CASE_MAPPING_SLACK;
/* copy flags ONIGENC_CASE_UPCASE and ONIGENC_CASE_DOWNCASE over to
* ONIGENC_CASE_UP_SPECIAL and ONIGENC_CASE_DOWN_SPECIAL */
flags |= (flags & (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE)) << ONIGENC_CASE_SPECIAL_OFFSET;

while (*pp < end && to <= to_end) {
codepoint_length = ONIGENC_PRECISE_MBC_ENC_LEN(enc, *pp, end);
if (codepoint_length < 0)
return codepoint_length; /* encoding invalid */
code = ONIGENC_MBC_TO_CODE(enc, *pp, end);
*pp += codepoint_length;

if (code <= 'z') { /* ASCII comes first */
if (code >= 'a' && code <= 'z') {
if (flags & ONIGENC_CASE_UPCASE) {
MODIFIED;
if (flags & ONIGENC_CASE_FOLD_TURKISH_AZERI && code == 'i')
code = I_WITH_DOT_ABOVE;
else
code += 'A' - 'a';
}
else if (!(flags&ONIGENC_CASE_ASCII_ONLY) && code>=0x00B5) { /* deal with non-ASCII; micron sign (U+00B5) is lowest affected */
const CodePointList3 *folded;

if (code==I_WITH_DOT_ABOVE) {
if (flags&(ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD)) {
MODIFIED;
code = 'i';
if (!(flags&ONIGENC_CASE_FOLD_TURKISH_AZERI)) { /* make dot above explicit */
to += ONIGENC_CODE_TO_MBC(enc, code, to);
code = DOT_ABOVE;
}
}
}
else if (code==DOTLESS_i) { /* handle this manually, because it isn't involved in folding */
if (flags&ONIGENC_CASE_UPCASE)
MODIFIED, code = 'I';
}
else if (code >= 'A' && code <= 'Z') {
if (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD)) {
MODIFIED;
if (flags & ONIGENC_CASE_FOLD_TURKISH_AZERI && code == 'I')
code = DOTLESS_i;
else
code += 'a' - 'A';
}
}
}
else if (!(flags & ONIGENC_CASE_ASCII_ONLY) && code >= 0x00B5) { /* deal with non-ASCII; micron sign (U+00B5) is lowest affected */
const CodePointList3 *folded;

if (code == I_WITH_DOT_ABOVE) {
if (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD)) {
MODIFIED;
code = 'i';
if (!(flags & ONIGENC_CASE_FOLD_TURKISH_AZERI)) { /* make dot above explicit */
to += ONIGENC_CODE_TO_MBC(enc, code, to);
code = DOT_ABOVE;
}
}
}
else if (code == DOTLESS_i) { /* handle this manually, because it isn't involved in folding */
if (flags & ONIGENC_CASE_UPCASE) {
MODIFIED;
code = 'I';
}
}
else if ((folded = onigenc_unicode_fold_lookup(code)) != 0) { /* data about character found in CaseFold_11_Table */
if ((flags & ONIGENC_CASE_TITLECASE) /* Titlecase needed, */
&& (OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_IS_TITLECASE)) { /* but already Titlecase */
/* already Titlecase, no changes needed */
}
else if (flags & OnigCaseFoldFlags(folded->n)) { /* needs and data availability match */
const OnigCodePoint *next;
int count;

MODIFIED;
if (flags & OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_SPECIALS) { /* special */
const OnigCodePoint *SpecialsStart = CaseMappingSpecials + OnigSpecialIndexDecode(folded->n);

if (OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_IS_TITLECASE) { /* swapCASE available */
if ((flags & (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE))
== (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE)) /* swapCASE needed */
goto SpecialsCopy;
else /* swapCASE not needed */
SpecialsStart += SpecialsLengthExtract(*SpecialsStart);
}
else if ((folded = onigenc_unicode_fold_lookup(code)) != 0) { /* data about character found in CaseFold_11_Table */
if ((flags&ONIGENC_CASE_TITLECASE) /* Titlecase needed, */
&& (OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_IS_TITLECASE)) { /* but already Titlecase */
/* already Titlecase, no changes needed */
}
else if (flags&OnigCaseFoldFlags(folded->n)) { /* needs and data availability match */
const OnigCodePoint *next;
int count;

MODIFIED;
if (flags&OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_SPECIALS) { /* special */
const OnigCodePoint *SpecialsStart = CaseMappingSpecials + OnigSpecialIndexDecode(folded->n);

if (OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_IS_TITLECASE) { /* swapCASE available */
if ((flags&(ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE))
== (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE)) /* swapCASE needed */
goto SpecialsCopy;
else /* swapCASE not needed */
SpecialsStart += SpecialsLengthExtract(*SpecialsStart);
}
if (OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_TITLECASE) { /* Titlecase available */
if (flags&ONIGENC_CASE_TITLECASE) /* Titlecase needed, but not yet Titlecase */
goto SpecialsCopy;
else /* Titlecase not needed */
SpecialsStart += SpecialsLengthExtract(*SpecialsStart);
}
if (OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_DOWN_SPECIAL) {
if (!(flags&ONIGENC_CASE_DOWN_SPECIAL))
SpecialsStart += SpecialsLengthExtract(*SpecialsStart);
}
/* here, we know we use ONIGENC_CASE_UP_SPECIAL, and the position is right */
SpecialsCopy:
count = SpecialsLengthExtract(*SpecialsStart);
next = SpecialsStart;
code = SpecialsCodepointExtract(*next++);
}
else { /* no specials */
count = OnigCodePointCount(folded->n);
next = folded->code;
code = *next++;
}
if (count==1)
;
else if (count==2) {
to += ONIGENC_CODE_TO_MBC(enc, code, to);
code = *next;
}
else { /* count == 3 */
to += ONIGENC_CODE_TO_MBC(enc, code, to);
to += ONIGENC_CODE_TO_MBC(enc, *next++, to);
code = *next;
}
}
if (OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_TITLECASE) { /* Titlecase available */
if (flags & ONIGENC_CASE_TITLECASE) /* Titlecase needed, but not yet Titlecase */
goto SpecialsCopy;
else /* Titlecase not needed */
SpecialsStart += SpecialsLengthExtract(*SpecialsStart);
}
else if ((folded = onigenc_unicode_unfold1_lookup(code)) != 0 /* data about character found in CaseUnfold_11_Table */
&& flags&OnigCaseFoldFlags(folded->n)) { /* needs and data availability match */
MODIFIED;
code = folded->code[(flags&OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_TITLECASE) ? 1 : 0];
if (OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_DOWN_SPECIAL) {
if (!(flags & ONIGENC_CASE_DOWN_SPECIAL))
SpecialsStart += SpecialsLengthExtract(*SpecialsStart);
}
/* here, we know we use ONIGENC_CASE_UP_SPECIAL, and the position is right */
SpecialsCopy:
count = SpecialsLengthExtract(*SpecialsStart);
next = SpecialsStart;
code = SpecialsCodepointExtract(*next++);
}
else { /* no specials */
count = OnigCodePointCount(folded->n);
next = folded->code;
code = *next++;
}
if (count == 1)
;
else if (count == 2) {
to += ONIGENC_CODE_TO_MBC(enc, code, to);
code = *next;
}
else { /* count == 3 */
to += ONIGENC_CODE_TO_MBC(enc, code, to);
to += ONIGENC_CODE_TO_MBC(enc, *next++, to);
code = *next;
}
}
to += ONIGENC_CODE_TO_MBC(enc, code, to);
/* switch from titlecase to lowercase for capitalize */
if (flags & ONIGENC_CASE_TITLECASE)
flags ^= (ONIGENC_CASE_UPCASE |ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE|
ONIGENC_CASE_UP_SPECIAL|ONIGENC_CASE_DOWN_SPECIAL);
}
else if ((folded = onigenc_unicode_unfold1_lookup(code)) != 0 /* data about character found in CaseUnfold_11_Table */
&& flags & OnigCaseFoldFlags(folded->n)) { /* needs and data availability match */
MODIFIED;
code = folded->code[(flags & OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_TITLECASE) ? 1 : 0];
}
}
*flagP = flags;
return (int)(to-to_start);
to += ONIGENC_CODE_TO_MBC(enc, code, to);
/* switch from titlecase to lowercase for capitalize */
if (flags & ONIGENC_CASE_TITLECASE)
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE |
ONIGENC_CASE_UP_SPECIAL | ONIGENC_CASE_DOWN_SPECIAL);
}
*flagP = flags;
return (int )(to - to_start);
}

#if 0


+ 6
- 3
enc/us_ascii.c View File

@@ -1,7 +1,10 @@
#include "regenc.h"
#include "encindex.h"
#ifdef RUBY
# include "encindex.h"
#endif

#ifndef ENCINDEX_US_ASCII
#define ENCINDEX_US_ASCII 0
# define ENCINDEX_US_ASCII 0
#endif

static int
@@ -29,9 +32,9 @@ OnigEncodingDefine(us_ascii, US_ASCII) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
onigenc_single_byte_ascii_only_case_map,
ENCINDEX_US_ASCII,
ONIGENC_FLAG_NONE,
onigenc_single_byte_ascii_only_case_map,
};
ENC_ALIAS("ASCII", "US-ASCII")
ENC_ALIAS("ANSI_X3.4-1968", "US-ASCII")


+ 1
- 1
enc/utf_16be.c View File

@@ -249,8 +249,8 @@ OnigEncodingDefine(utf_16be, UTF_16BE) = {
onigenc_utf16_32_get_ctype_code_range,
utf16be_left_adjust_char_head,
onigenc_always_false_is_allowed_reverse_match,
onigenc_unicode_case_map,
0,
ONIGENC_FLAG_UNICODE,
onigenc_unicode_case_map,
};
ENC_ALIAS("UCS-2BE", "UTF-16BE")

+ 1
- 1
enc/utf_16le.c View File

@@ -242,7 +242,7 @@ OnigEncodingDefine(utf_16le, UTF_16LE) = {
onigenc_utf16_32_get_ctype_code_range,
utf16le_left_adjust_char_head,
onigenc_always_false_is_allowed_reverse_match,
onigenc_unicode_case_map,
0,
ONIGENC_FLAG_UNICODE,
onigenc_unicode_case_map,
};

+ 1
- 2
enc/utf_32be.c View File

@@ -187,9 +187,8 @@ OnigEncodingDefine(utf_32be, UTF_32BE) = {
onigenc_utf16_32_get_ctype_code_range,
utf32be_left_adjust_char_head,
onigenc_always_false_is_allowed_reverse_match,
onigenc_unicode_case_map,
0,
ONIGENC_FLAG_UNICODE,
onigenc_unicode_case_map,
};
ENC_ALIAS("UCS-4BE", "UTF-32BE")


+ 1
- 1
enc/utf_32le.c View File

@@ -187,8 +187,8 @@ OnigEncodingDefine(utf_32le, UTF_32LE) = {
onigenc_utf16_32_get_ctype_code_range,
utf32le_left_adjust_char_head,
onigenc_always_false_is_allowed_reverse_match,
onigenc_unicode_case_map,
0,
ONIGENC_FLAG_UNICODE,
onigenc_unicode_case_map,
};
ENC_ALIAS("UCS-4LE", "UTF-32LE")

+ 8
- 6
enc/utf_8.c View File

@@ -28,17 +28,20 @@
*/

#include "regenc.h"
#include "encindex.h"
#ifdef RUBY
# include "encindex.h"
#endif

#ifndef ENCINDEX_UTF_8
#define ENCINDEX_UTF_8 0
# define ENCINDEX_UTF_8 0
#endif

#define USE_INVALID_CODE_SCHEME

#ifdef USE_INVALID_CODE_SCHEME
/* virtual codepoint values for invalid encoding byte 0xfe and 0xff */
#define INVALID_CODE_FE 0xfffffffe
#define INVALID_CODE_FF 0xffffffff
# define INVALID_CODE_FE 0xfffffffe
# define INVALID_CODE_FF 0xffffffff
#endif
#define VALID_CODE_LIMIT 0x0010ffff

@@ -428,9 +431,9 @@ OnigEncodingDefine(utf_8, UTF_8) = {
get_ctype_code_range,
left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
onigenc_unicode_case_map,
ENCINDEX_UTF_8,
ONIGENC_FLAG_UNICODE,
onigenc_unicode_case_map,
};
ENC_ALIAS("CP65001", "UTF-8")

@@ -444,4 +447,3 @@ ENC_ALIAS("CP65001", "UTF-8")
ENC_REPLICATE("UTF8-MAC", "UTF-8")
ENC_ALIAS("UTF-8-MAC", "UTF8-MAC")
ENC_ALIAS("UTF-8-HFS", "UTF8-MAC") /* Emacs 23.2 */


+ 18
- 17
enc/windows_1250.c View File

@@ -191,40 +191,41 @@ cp1250_get_case_fold_codes_by_str(OnigCaseFoldType flag,
}

static int
case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
const struct OnigEncodingTypeST* enc)
case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
const struct OnigEncodingTypeST* enc)
{
OnigCodePoint code;
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;

while (*pp<end && to<to_end) {
while (*pp < end && to < to_end) {
code = *(*pp)++;
if (code==SHARP_s) {
if (flags&ONIGENC_CASE_UPCASE) {
if (code == SHARP_s) {
if (flags & ONIGENC_CASE_UPCASE) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 'S';
code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
}
else if (flags&ONIGENC_CASE_FOLD) {
else if (flags & ONIGENC_CASE_FOLD) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 's';
code = 's';
}
}
else if ((EncCP1250_CtypeTable[code] & BIT_CTYPE_UPPER)
&& (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
&& (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
flags |= ONIGENC_CASE_MODIFIED;
code = ENC_CP1250_TO_LOWER_CASE(code);
}
else if (code==0xB5) ;
else if (code == 0xB5)
;
else if ((EncCP1250_CtypeTable[code]&BIT_CTYPE_LOWER)
&& (flags&ONIGENC_CASE_UPCASE)) {
&& (flags & ONIGENC_CASE_UPCASE)) {
flags |= ONIGENC_CASE_MODIFIED;
if (code==0xB9)
if (code == 0xB9)
code = 0xA5;
else if (code==0xBE)
else if (code == 0xBE)
code = 0xBC;
else if (code >= 0x8A && code <= 0xBF && code!=0xB9)
code -= 0x10;
@@ -232,11 +233,11 @@ case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
code -= 0x20;
}
*to++ = code;
if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
}
*flagP = flags;
return (int)(to-to_start);
return (int )(to - to_start);
}

OnigEncodingDefine(windows_1250, Windows_1250) = {
@@ -256,9 +257,9 @@ OnigEncodingDefine(windows_1250, Windows_1250) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
case_map,
0,
ONIGENC_FLAG_NONE,
case_map,
};
/*
* Name: windows-1250


+ 18
- 17
enc/windows_1251.c View File

@@ -181,49 +181,50 @@ cp1251_get_case_fold_codes_by_str(OnigCaseFoldType flag,
}

static int
case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,