module Make: functor (Config : ConfigInt.Type) -> Type
Character Information
type general_category_type = [ `Cc
| `Cf
| `Cn
| `Co
| `Cs
| `Ll
| `Lm
| `Lo
| `Lt
| `Lu
| `Mc
| `Me
| `Mn
| `Nd
| `Nl
| `No
| `Pc
| `Pd
| `Pe
| `Pf
| `Pi
| `Po
| `Ps
| `Sc
| `Sk
| `Sm
| `So
| `Zl
| `Zp
| `Zs ]
Type of Unicode general character categories.
Each variant specifies
`Lu : Letter, Uppercase
`Ll : Letter, Lowercase
`Lt : Letter, Titlecase
`Mn : Mark, Non-Spacing
`Mc : Mark, Spacing Combining
`Me : Mark, Enclosing
`Nd : Number, Decimal Digit
`Nl : Number, Letter
`No : Number, Other
`Zs : Separator, Space
`Zl : Separator, Line
`Zp : Separator, Paragraph
`Cc : Other, Control
`Cf : Other, Format
`Cs : Other, Surrogate
`Co : Other, Private Use
`Cn : Other, Not Assigned
`Lm : Letter, Modifier
`Lo : Letter, Other
`Pc : Punctuation, Connector
`Pd : Punctuation, Dash
`Ps : Punctuation, Open
`Pe : Punctuation, Close
`Pi : Punctuation, Initial
`Pf : Punctuation, Final
`Po : Punctuation, Other
`Sm : Symbol, Math
`Sc : Symbol, Currency
`Sk : Symbol, Modifier
`So : Symbol, Other
val general_category : UChar.t -> general_category_type
val load_general_category_map : unit -> general_category_type UMap.t
type character_property_type = [ `Alphabetic
| `Ascii_Hex_Digit
| `Bidi_Control
| `Default_Ignorable_Code_Point
| `Deprecated
| `Diacritic
| `Extender
| `Grapheme_Base
| `Grapheme_Extend
| `Grapheme_Link
| `Hex_Digit
| `Hyphen
| `IDS_Binary_Operator
| `IDS_Trinary_Operator
| `ID_Continue
| `ID_Start
| `Ideographic
| `Logical_Order_Exception
| `Lowercase
| `Math
| `Noncharacter_Code_Point
| `Other_Alphabetic
| `Other_Grapheme_Extend
| `Other_Lowercase
| `Other_Math
| `Other_Uppercase
| `Other_default_Ignorable_Code_Point
| `Quotation_Mark
| `Radical
| `Soft_Dotted
| `Terminal_Punctuation
| `Unified_Ideograph
| `Uppercase
| `White_Space
| `XID_Continue
| `XID_Start ]
Type of character properties
val load_property_tbl : character_property_type -> UCharTbl.Bool.t
Load the table for the given character type.
val load_property_tbl_by_name : string -> UCharTbl.Bool.t
Load the table for the given name of the character type.
The name can be obtained by removing ` from its name of
the polymorphic variant tag.
val load_property_set : character_property_type -> USet.t
Load the set of characters of the given character type.
val load_property_set_by_name : string -> USet.t
Load the set of characters of the given name of the character type.
The name can be obtained by removing ` from its name of
the polymorphic variant tag.
type script_type = [ `Arabic
| `Armenian
| `Bengali
| `Bopomofo
| `Buhid
| `Canadian_Aboriginal
| `Cherokee
| `Common
| `Cyrillic
| `Deseret
| `Devanagari
| `Ethiopic
| `Georgian
| `Gothic
| `Greek
| `Gujarati
| `Gurmukhi
| `Han
| `Hangul
| `Hanunoo
| `Hebrew
| `Hiragana
| `Inherited
| `Kannada
| `Katakana
| `Khmer
| `Lao
| `Latin
| `Malayalam
| `Mongolian
| `Myanmar
| `Ogham
| `Old_Italic
| `Oriya
| `Runic
| `Sinhala
| `Syriac
| `Tagalog
| `Tagbanwa
| `Tamil
| `Telugu
| `Thaana
| `Thai
| `Tibetan
| `Yi ]
Type for script type
val script : UChar.t -> script_type
val load_script_map : unit -> script_type UMap.t
casing
val load_to_lower1_tbl : unit -> UChar.t UCharTbl.t
val load_to_upper1_tbl : unit -> UChar.t UCharTbl.t
val load_to_title1_tbl : unit -> UChar.t UCharTbl.t
type casemap_condition = [ `AfterSoftDotted
| `BeforeDot
| `FinalSigma
| `Locale of string
| `MoreAbove
| `Not of casemap_condition ]
type special_casing_property = {
}
val load_conditional_casing_tbl : unit -> special_casing_property list UCharTbl.t
val load_casefolding_tbl : unit -> UChar.t list UCharTbl.t
val combined_class : UChar.t -> int
Combined class
A combined class is an integer of 0 -- 255, showing how this character
interacts to other combined characters.
Decomposition
type decomposition_type = [ `Canon
| `Circle
| `Compat
| `Final
| `Font
| `Fraction
| `Initial
| `Isolated
| `Medial
| `Narrow
| `NoBreak
| `Small
| `Square
| `Sub
| `Super
| `Vertical
| `Wide ]
Types of decomposition.
type decomposition_info = [ `Canonform
| `Composite of decomposition_type * UChar.t list
| `HangulSyllable ]
val load_decomposition_tbl : unit -> decomposition_info UCharTbl.t
Canonical Composition
val load_composition_tbl : unit -> (UChar.t * UChar.t) list UCharTbl.t
The return value [(u_1, u'_1); ... (u_n, u'_1)] means
for the given character u, u u_i forms
the canonical composition u'_i.
If u is a Hangul jamo, composition returns [].
val load_composition_exclusion_tbl : unit -> UCharTbl.Bool.t
Whether the given composed character is used in NFC or NFKC