unicode_linebreak/
shared.rs

1/// Unicode line breaking class.
2#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
3#[repr(u8)]
4pub enum BreakClass {
5    // Non-tailorable
6    /// Cause a line break (after)
7    Mandatory,
8    /// Cause a line break (after), except between CR and LF
9    CarriageReturn,
10    /// Cause a line break (after)
11    LineFeed,
12    /// Prohibit a line break between the character and the preceding character
13    CombiningMark,
14    /// Cause a line break (after)
15    NextLine,
16    /// Do not occur in well-formed text
17    Surrogate,
18    /// Prohibit line breaks before and after
19    WordJoiner,
20    /// Provide a break opportunity
21    ZeroWidthSpace,
22    /// Prohibit line breaks before and after
23    NonBreakingGlue,
24    /// Enable indirect line breaks
25    Space,
26    /// Prohibit line breaks within joiner sequences
27    ZeroWidthJoiner,
28    // Break opportunities
29    /// Provide a line break opportunity before and after the character
30    BeforeAndAfter,
31    /// Generally provide a line break opportunity after the character
32    After,
33    /// Generally provide a line break opportunity before the character
34    Before,
35    /// Provide a line break opportunity after the character, except in numeric context
36    Hyphen,
37    /// Provide a line break opportunity contingent on additional information
38    Contingent,
39    // Characters prohibiting certain breaks
40    /// Prohibit line breaks before
41    ClosePunctuation,
42    /// Prohibit line breaks before
43    CloseParenthesis,
44    /// Prohibit line breaks before
45    Exclamation,
46    /// Allow only indirect line breaks between pairs
47    Inseparable,
48    /// Allow only indirect line breaks before
49    NonStarter,
50    /// Prohibit line breaks after
51    OpenPunctuation,
52    /// Act like they are both opening and closing
53    Quotation,
54    // Numeric context
55    /// Prevent breaks after any and before numeric
56    InfixSeparator,
57    /// Form numeric expressions for line breaking purposes
58    Numeric,
59    /// Do not break following a numeric expression
60    Postfix,
61    /// Do not break in front of a numeric expression
62    Prefix,
63    /// Prevent a break before, and allow a break after
64    Symbol,
65    // Other characters
66    /// Act like AL when the resolved EAW is N; otherwise, act as ID
67    Ambiguous,
68    /// Are alphabetic characters or symbols that are used with alphabetic characters
69    Alphabetic,
70    /// Treat as NS or ID for strict or normal breaking.
71    ConditionalJapaneseStarter,
72    /// Do not break from following Emoji Modifier
73    EmojiBase,
74    /// Do not break from preceding Emoji Base
75    EmojiModifier,
76    /// Form Korean syllable blocks
77    HangulLvSyllable,
78    /// Form Korean syllable blocks
79    HangulLvtSyllable,
80    /// Do not break around a following hyphen; otherwise act as Alphabetic
81    HebrewLetter,
82    /// Break before or after, except in some numeric context
83    Ideographic,
84    /// Form Korean syllable blocks
85    HangulLJamo,
86    /// Form Korean syllable blocks
87    HangulVJamo,
88    /// Form Korean syllable blocks
89    HangulTJamo,
90    /// Keep pairs together. For pairs, break before and after other classes
91    RegionalIndicator,
92    /// Provide a line break opportunity contingent on additional, language-specific context analysis
93    ComplexContext,
94    /// Have as yet unknown line breaking behavior or unassigned code positions
95    Unknown,
96}
97
98use BreakClass::{
99    After as BA, Alphabetic as AL, Ambiguous as AI, Before as BB, BeforeAndAfter as B2,
100    CarriageReturn as CR, CloseParenthesis as CP, ClosePunctuation as CL, CombiningMark as CM,
101    ComplexContext as SA, ConditionalJapaneseStarter as CJ, Contingent as CB, EmojiBase as EB,
102    EmojiModifier as EM, Exclamation as EX, HangulLJamo as JL, HangulLvSyllable as H2,
103    HangulLvtSyllable as H3, HangulTJamo as JT, HangulVJamo as JV, HebrewLetter as HL,
104    Hyphen as HY, Ideographic as ID, InfixSeparator as IS, Inseparable as IN, LineFeed as LF,
105    Mandatory as BK, NextLine as NL, NonBreakingGlue as GL, NonStarter as NS, Numeric as NU,
106    OpenPunctuation as OP, Postfix as PO, Prefix as PR, Quotation as QU, RegionalIndicator as RI,
107    Space as SP, Surrogate as SG, Symbol as SY, Unknown as XX, WordJoiner as WJ,
108    ZeroWidthJoiner as ZWJ, ZeroWidthSpace as ZW,
109};
110
111/// Ceiling for code points in the Basic Multilingual Place (BMP).
112const BMP_LIMIT: u32 = 0x10000;
113
114/// Shift size for getting index-3 table offset.
115const SHIFT_3: u32 = 4;
116/// Shift size for getting index-2 table offset.
117const SHIFT_2: u32 = 5 + SHIFT_3;
118/// Shift size for getting index-1 table offset.
119const SHIFT_1: u32 = 5 + SHIFT_2;
120/// Shift size for getting BMP block start.
121const BMP_SHIFT: u32 = 6;
122
123const INDEX_2_BLOCK_LENGTH: u32 = 1 << (SHIFT_1 - SHIFT_2);
124const INDEX_3_BLOCK_LENGTH: u32 = 1 << (SHIFT_2 - SHIFT_3);
125const SMALL_DATA_BLOCK_LENGTH: u32 = 1 << SHIFT_3;
126const BMP_DATA_BLOCK_LENGTH: u32 = 1 << BMP_SHIFT;
127
128const ALLOWED_BREAK_BIT: u8 = 0x80;
129const MANDATORY_BREAK_BIT: u8 = 0x40;
130
131#[allow(non_upper_case_globals)]
132const eot: u8 = 43;
133#[allow(non_upper_case_globals)]
134const sot: u8 = 44;