;;; -*- mode: emacs-lisp; coding: utf-8; buffer-file-coding-system: utf-8; coding-system-for-read: utf-8; coding-system-for-write: utf-8; -*- ; ;------------------------------------------------------------------ (unless (boundp 'ql-html-debug-p) ; if symbol is mot yet defined (setq ql-html-debug-p 't) ) (when ql-html-debug-p (if load-in-progress (message "Loading \"%s\"..." load-file-name) (message "This buffer contains file \"%s\"." (buffer-file-name)) ) ) (require 'ql-request-user-char-f "ql-request-user-char.el") ;------------------------------------------------------------------ ; ; NOTA BENE! ; ~~~~~~~~~~ ; All functions here operate within Emacs own character coding set, ; i.e. character codepoints are those of Emacs rather than UCS. ; So do we need to (encode-char ... 'ucs) here (e.g. for our tables lookups)? ; Probably not as all our tables, alists and arrays use Emacs' own codes. ; (insert (format "&#%d;" (encode-char the-char 'ucs))) ; ;------------------------------------------------------------------ ;------------------------------------------------------------------ ; ; Use hash table for entites association instead of alist. ; (defconst ql-html-entity-hash-table #s(hash-table ; This is hash table of HTML entities indexed by characters. size 499 ; 443 449 457 461 463 467 479 487 491 499 503 509 521 weakness nil test eql data ( ; 0-31 'nil ; Control characters. Do NOT mangle with them! ; 32 'nil ; " " = Space. No need to replace it. 33 "!" ; "!" = Exclamation mark. ; Does it make sense to encode it? 34 """ ; No need to replace double quotes by HTML code. ; 34 ; == ?\" (double quote as a character) ; #x22 ; == ?\" (double quote as a character) 35 "#" ; "#" = Number sign, hash sign 36 "$" ; "$" = Dollar sign 37 "%" ; "%" = Percent sign 38 "&" ; "&" = Ampersand (&) ; Careless substitution of "&" will ruin HTML! 39 "'" ; "'" = Apostrophe (') ; No need to replace apostrophe by HTML code. 40 "(" ; "(" = Left parenthesis 41 ")" ; ")" = Right parenthesis 42 "*" ; "*" = Asterisk (star) 43 "+" ; "+" = Plus sign 44 "," ; "," = Comma ; 45 nil ; "-" = Hyphen-minus 46 "." ; "." = Full stop (period) 47 "/" ; "/" = Slash ; 48 'nil ; "0" = Digit. No need to replace it. ; ... ; 57 'nil ; "9" = Digit. No need to replace it. 58 ":" ; ":" = Colon 59 ";" ; ";" = Semicolon 60 "<" ; "<" = Less-than sign ; Careless substitution of "<" will ruin HTML! 61 "=" ; "=" = Equals sign 62 ">" ; ">" = Greater-than sign ; Careless substitution of ">" will ruin HTML! 63 "?" ; "?" = Question mark 64 "@" ; "@" = At sign, commercial at ; 65 'nil ; "A" = Latin letter. No need to replace it. ; ... ; 90 'nil ; "Z" = Latin letter. No need to replace it. 91 "[" ; "[" = Left square bracket (also [) 92 "\" ; "\" = Backslash 93 "]" ; "]" = Right square bracket (also ]) 94 "^" ; "^" = Caret 95 "_" ; "_" = Underscore (also _) 96 "`" ; "`" = Backtick ; 97 'nil ; "a" = Latin letter. No need to replace it. ; ... ; 122 'nil ; "z" = Latin letter. No need to replace it. 123 "{" ; "{" = Left curly bracket (also {) 124 "|" ; "|" = Vetical bar (also |, |) 125 "}" ; "}" = Right curly bracket (also }) 126 "˜" ; "~" = Tilde ; 127 'nil ; "" = Delete 'Ctrl-?' ; ------ ASCII (127) END ------ ASCII (127) END ------ ; ; (string-equal "\u00a0" " ") = t #x00a0 " " ; " " ; non-breaking space #x00a1 "¡" ; "¡" ; inverted exclamation mark #x00a2 "¢" ; "¢" ; cent sign #x00a3 "£" ; "£" ; pound sign (pound sterling sign) #x00a4 "¤" ; "¤" ; currency sign (general currency sign) #x00a5 "¥" ; "¥" ; yen sign #x00a6 "¦" ; "¦" ; broken bar == broken vertical bar ; #x00a6 "&brkbar;" ; "¦" ; broken bar == broken vertical bar #x00a7 "§" ; "§" ; section sign #x00a8 "¨" ; "¨" ; umlaut == dieresis (spacing dieresis) ; #x00a8 "¨" ; "¨" ; spacing dieresis == umlaut #x00a9 "©" ; "©" ; copyright sign #x00aa "ª" ; "ª" ; feminine ordinal indicator #x00ab "„" ; "«" ; Russian quotation style („) ; #x00ab "«" ; "«" ; english quotation style (guillemet) #x00ac "¬" ; "¬" ; not sign #x00ad "­" ; "­" ; soft hyphen #x00ae "®" ; "®" ; registered sign (Trade Mark sign) #x00af "¯" ; "¯" ; macron (spacing macron long accent) ; #x00af "&hibar;" ; "¯" ; spacing macron long accent (macron) #x00b0 "°" ; "°" ; degree sign #x2103 "℃" ; "℃" ; degree Celsius #x00b1 "±" ; "±" ; plus-minus sign #x00b2 "²" ; "²" ; superscript two #x00b3 "³" ; "³" ; superscript three #x00b4 "´" ; "´" ; acute accent (spacing accute accent) #x00b5 "µ" ; "µ" ; micro sign (also mu) #x00b6 "¶" ; "¶" ; pilcrow sign == paragraph sign #x00b7 "·" ; "·" ; middle dot (centered dot) #x00b8 "¸" ; "¸" ; cedilla (spacing cedilla) #x00b9 "¹" ; "¹" ; superscript one #x00ba "º" ; "º" ; masculine ordinal indicator #x00bb "”" ; "»" ; Russian quotation style (”) ; #x00bb "»" ; "»" ; english quotation style (guillemet) #x00bc "¼" ; "¼" ; vulgar fraction one quarter #x00bd "½" ; "½" ; vulgar fraction one half ; #x00bd "½" ; "½" ; vulgar fraction one half #x00be "¾" ; "¾" ; vulgar fraction three quarters #x00bf "¿" ; "¿" ; inverted question mark #x00c0 "À" ; "À" ; Latin capital letter A with grave #x00c1 "Á" ; "Á" ; Latin capital letter A with acute #x00c2 "Â" ; "Â" ; Latin capital letter A with circumflex #x00c3 "Ã" ; "Ã" ; Latin capital letter A with tilde #x00c4 "Ä" ; "Ä" ; Latin capital letter A with diaeresis (with umlaut) #x00c5 "Å" ; "Å" ; Latin capital letter A with ring above #x00c6 "Æ" ; "Æ" ; Latin capital letter AE (AE ligature) #x00c7 "Ç" ; "Ç" ; Latin capital letter C with cedilla #x00c8 "È" ; "È" ; Latin capital letter E with grave #x00c9 "É" ; "É" ; Latin capital letter E with acute #x00ca "Ê" ; "Ê" ; Latin capital letter E with circumflex #x00cb "Ë" ; "Ë" ; Latin capital letter E with diaeresis (with umlaut) #x00cc "Ì" ; "Ì" ; Latin capital letter I with grave #x00cd "Í" ; "Í" ; Latin capital letter I with acute #x00ce "Î" ; "Î" ; Latin capital letter I with circumflex #x00cf "Ï" ; "Ï" ; Latin capital letter I with diaeresis (with umlaut) #x00d0 "Ð" ; "Ð" ; Latin capital letter ETH #x00d1 "Ñ" ; "Ñ" ; Latin capital letter N with tilde #x00d2 "Ò" ; "Ò" ; Latin capital letter O with grave #x00d3 "Ó" ; "Ó" ; Latin capital letter O with acute #x00d4 "Ô" ; "Ô" ; Latin capital letter O with circumflex #x00d5 "Õ" ; "Õ" ; Latin capital letter O with tilde #x00d6 "Ö" ; "Ö" ; Latin capital letter O with diaeresis (with umlaut) #x00d7 "×" ; "×" ; multiplication sign #x00d8 "Ø" ; "Ø" ; Latin capital letter O with stroke (with slash) #x00d9 "Ù" ; "Ù" ; Latin capital letter U with grave #x00da "Ú" ; "Ú" ; Latin capital letter U with acute #x00db "Û" ; "Û" ; Latin capital letter U with circumflex #x00dc "Ü" ; "Ü" ; Latin capital letter U with diaeresis (with umlaut) #x00dd "Ý" ; "Ý" ; Latin capital letter Y with acute #x00de "Þ" ; "Þ" ; Latin capital letter THORN #x00df "ß" ; "ß" ; Latin small letter sharp s (sz ligature) #x00e0 "à" ; "à" ; Latin small letter a with grave #x00e1 "á" ; "á" ; Latin small letter a with acute #x00e2 "â" ; "â" ; Latin small letter a with circumflex #x00e3 "ã" ; "ã" ; Latin small letter a with tilde #x00e4 "ä" ; "ä" ; Latin small letter a with diaeresis (with umlaut) #x00e5 "å" ; "å" ; Latin small letter a with ring above #x00e6 "æ" ; "æ" ; Latin small letter ae (ae ligature) #x00e7 "ç" ; "ç" ; Latin small letter c with cedilla #x00e8 "è" ; "è" ; Latin small letter e with grave #x00e9 "é" ; "é" ; Latin small letter e with acute #x00ea "ê" ; "ê" ; Latin small letter e with circumflex #x00eb "ë" ; "ë" ; Latin small letter e with diaeresis (with umlaut) #x00ec "ì" ; "ì" ; Latin small letter i with grave #x00ed "í" ; "í" ; Latin small letter i with acute #x00ee "î" ; "î" ; Latin small letter i with circumflex #x00ef "ï" ; "ï" ; Latin small letter i with diaeresis (with umlaut) #x00f0 "ð" ; "ð" ; Latin small letter eth #x00f1 "ñ" ; "ñ" ; Latin small letter n with tilde #x00f2 "ò" ; "ò" ; Latin small letter o with grave #x00f3 "ó" ; "ó" ; Latin small letter o with acute #x00f4 "ô" ; "ô" ; Latin small letter o with circumflex #x00f5 "õ" ; "õ" ; Latin small letter o with tilde #x00f6 "ö" ; "ö" ; Latin small letter o with diaeresis (with umlaut) #x00f7 "÷" ; "÷" ; division sign #x00f8 "ø" ; "ø" ; Latin small letter o with stroke (with slash) #x00f9 "ù" ; "ù" ; Latin small letter u with grave #x00fa "ú" ; "ú" ; Latin small letter u with acute #x00fb "û" ; "û" ; Latin small letter u with circumflex #x00fc "ü" ; "ü" ; Latin small letter u with diaeresis (with umlaut) #x00fd "ý" ; "ý" ; Latin small letter y with acute #x00fe "þ" ; "þ" ; Latin small letter thorn #x00ff "ÿ" ; "ÿ" ; Latin small letter y with diaeresis (with umlaut) #x0100 "Ā" ; "Ā" ; Latin capital letter A with macron #x0101 "ā" ; "ā" ; Latin small letter a with macron #x0112 "Ē" ; "Ē" ; Latin capital letter E with macron #x0113 "ē" ; "ē" ; Latin small letter e with macron #x0126 "&Hbar;" ; "Ħ" ; Latin capital letter h with stroke (with bar) #x0127 "ℏ" ; "ħ" ; Latin small letter h with stroke (with bar) (reduced Planck constant: h/2pi == 6,582119569...*10^{-16) eV*s) #x014c "Ō" ; "Ō" ; Latin capital letter O with macron #x014d "ō" ; "ō" ; Latin small letter o with macron #x0152 "Œ" ; "Œ" ; Latin capital ligature oe #x0153 "œ" ; "œ" ; Latin small ligature oe #x0160 "Š" ; "Š" ; Latin capital letter s with caron #x0161 "š" ; "š" ; Latin small letter s with caron #x016a "Ū" ; "Ū" ; Latin capital letter U with macron #x016b "ū" ; "ū" ; Latin small letter u with macron #x0178 "Ÿ" ; "Ÿ" ; Latin capital letter y with diaeresis #x0192 "ƒ" ; "ƒ" ; Latin small letter f with hook #xa7ac "Ɡ" ; "Ɡ" ; Latin capital letter script G #x0261 "ɡ" ; "ɡ" ; Latin small letter script G #x1da2 "ᶢ" ; "ᶢ" ; modifier letter small script G #x026a "ɪ" ; "ɪ" ; Latin letter small capital I #xa7ae "Ɪ" ; "Ɪ" ; Latin capital letter small capital I #x0298 "ʘ" ; "ʘ" ; Latin letter bilabial click #x029a "ʚ" ; "ʚ" ; Latin small letter closed open E #x02bf "ʿ" ; "ʿ" ; modifier letter left half ring #x02c6 "ˆ" ; "ˆ" ; modifier letter circumflex accent #x02c8 "ˈ" ; "ˈ" ; modifier letter vertical line #x02d0 "ː" ; "ː" ; modifier letter triangular colon #x02da "˚" ; "˚" ; ring above #x02dc "˜" ; "˜" ; small tilde #x0333 "̳" ; "̳" ; combining double low line ; ; (aka non-spacing double underscore) #x0410 'nil ; "А" ; DO NOT replace Russian letters #x0411 'nil ; "Б" ; DO NOT replace Russian letters #x0412 'nil ; "В" ; DO NOT replace Russian letters #x0413 'nil ; "Г" ; DO NOT replace Russian letters #x0414 'nil ; "Д" ; DO NOT replace Russian letters #x0415 'nil ; "Е" ; DO NOT replace Russian letters #x0401 'nil ; "Ё" ; DO NOT replace Russian letters #x0416 'nil ; "Ж" ; DO NOT replace Russian letters #x0417 'nil ; "З" ; DO NOT replace Russian letters #x0418 'nil ; "И" ; DO NOT replace Russian letters #x0419 'nil ; "Й" ; DO NOT replace Russian letters #x041a 'nil ; "К" ; DO NOT replace Russian letters #x041b 'nil ; "Л" ; DO NOT replace Russian letters #x041c 'nil ; "М" ; DO NOT replace Russian letters #x041d 'nil ; "Н" ; DO NOT replace Russian letters #x041e 'nil ; "О" ; DO NOT replace Russian letters #x041f 'nil ; "П" ; DO NOT replace Russian letters #x0420 'nil ; "Р" ; DO NOT replace Russian letters #x0421 'nil ; "С" ; DO NOT replace Russian letters #x0422 'nil ; "Т" ; DO NOT replace Russian letters #x0423 'nil ; "У" ; DO NOT replace Russian letters #x0424 'nil ; "Ф" ; DO NOT replace Russian letters #x0425 'nil ; "Х" ; DO NOT replace Russian letters #x0426 'nil ; "Ц" ; DO NOT replace Russian letters #x0427 'nil ; "Ч" ; DO NOT replace Russian letters #x0428 'nil ; "Ш" ; DO NOT replace Russian letters #x0429 'nil ; "Щ" ; DO NOT replace Russian letters #x042a 'nil ; "Ъ" ; DO NOT replace Russian letters #x042b 'nil ; "Ы" ; DO NOT replace Russian letters #x042c 'nil ; "Ь" ; DO NOT replace Russian letters #x042d 'nil ; "Э" ; DO NOT replace Russian letters #x042e 'nil ; "Ю" ; DO NOT replace Russian letters #x042f 'nil ; "Я" ; DO NOT replace Russian letters #x0430 'nil ; "а" ; DO NOT replace Russian letters #x0431 'nil ; "б" ; DO NOT replace Russian letters #x0432 'nil ; "в" ; DO NOT replace Russian letters #x0433 'nil ; "г" ; DO NOT replace Russian letters #x0434 'nil ; "д" ; DO NOT replace Russian letters #x0435 'nil ; "е" ; DO NOT replace Russian letters #x0451 'nil ; "ё" ; DO NOT replace Russian letters #x0436 'nil ; "ж" ; DO NOT replace Russian letters #x0437 'nil ; "з" ; DO NOT replace Russian letters #x0438 'nil ; "и" ; DO NOT replace Russian letters #x0439 'nil ; "й" ; DO NOT replace Russian letters #x043a 'nil ; "к" ; DO NOT replace Russian letters #x043b 'nil ; "л" ; DO NOT replace Russian letters #x043c 'nil ; "м" ; DO NOT replace Russian letters #x043d 'nil ; "н" ; DO NOT replace Russian letters #x043e 'nil ; "о" ; DO NOT replace Russian letters #x043f 'nil ; "п" ; DO NOT replace Russian letters #x0440 'nil ; "р" ; DO NOT replace Russian letters #x0441 'nil ; "с" ; DO NOT replace Russian letters #x0442 'nil ; "т" ; DO NOT replace Russian letters #x0443 'nil ; "у" ; DO NOT replace Russian letters #x0444 'nil ; "ф" ; DO NOT replace Russian letters #x0445 'nil ; "х" ; DO NOT replace Russian letters #x0446 'nil ; "ц" ; DO NOT replace Russian letters #x0447 'nil ; "ч" ; DO NOT replace Russian letters #x0448 'nil ; "ш" ; DO NOT replace Russian letters #x0449 'nil ; "щ" ; DO NOT replace Russian letters #x044a 'nil ; "ъ" ; DO NOT replace Russian letters #x044b 'nil ; "ы" ; DO NOT replace Russian letters #x044c 'nil ; "ь" ; DO NOT replace Russian letters #x044d 'nil ; "э" ; DO NOT replace Russian letters #x044e 'nil ; "ю" ; DO NOT replace Russian letters #x044f 'nil ; "я" ; DO NOT replace Russian letters #x0768 "̀" ; "̀" ; combining grave accent #x0769 "́" ; "́" ; combining acute accent #x0771 "̃" ; "̃" ; combining tilde ; ̃ #x0778 "̊" ; "̊" ; combining ring above #x0785 "̑" ; "̑" ; combining inverted breve #x0815 "̯" ; "̯" ; combining inverted breve below #x0787 "̓" ; "̓" ; combining comma above ; ̓ #x0486 "҆" ; "҆" ; combining Cyrillic psili pneumata #x1fbf "᾿" ; "᾿" ; Greek psili #x0391 "Α" ; "Α" ; Greek capital letter Alpha #x1f08 "Ἀ" ; "Ἀ" ; Greek capital letter Alpha with psili #x1f0c "Ἄ" ; "Ἄ" ; Greek capital letter Alpha with psili and oxia #x1f0d "Ἅ" ; "Ἅ" ; Greek capital letter Alpha with dasia and oxia #x1fba "Ὰ" ; "Ὰ" ; Greek capital letter Alpha with varia #x0392 "Β" ; "Β" ; Greek capital letter beta #x0393 "Γ" ; "Γ" ; Greek capital letter gamma #x0394 "Δ" ; "Δ" ; Greek capital letter delta #x0395 "Ε" ; "Ε" ; Greek capital letter epsilon #x0396 "Ζ" ; "Ζ" ; Greek capital letter zeta #x0397 "Η" ; "Η" ; Greek capital letter eta #x0398 "Θ" ; "Θ" ; Greek capital letter theta #x0399 "Ι" ; "Ι" ; Greek capital letter iota #x039a "Κ" ; "Κ" ; Greek capital letter kappa #x039b "Λ" ; "Λ" ; Greek capital letter lamda #x039c "Μ" ; "Μ" ; Greek capital letter mu #x039d "Ν" ; "Ν" ; Greek capital letter nu #x039e "Ξ" ; "Ξ" ; Greek capital letter xi #x039f "Ο" ; "Ο" ; Greek capital letter omicron #x03a0 "Π" ; "Π" ; Greek capital letter pi #x03a1 "Ρ" ; "Ρ" ; Greek capital letter rho #x03a3 "Σ" ; "Σ" ; Greek capital letter sigma #x03a4 "Τ" ; "Τ" ; Greek capital letter tau #x03a5 "Υ" ; "Υ" ; Greek capital letter upsilon #x03a6 "Φ" ; "Φ" ; Greek capital letter phi #x03a7 "Χ" ; "Χ" ; Greek capital letter chi #x03a8 "Ψ" ; "Ψ" ; Greek capital letter psi #x03a9 "Ω" ; "Ω" ; Greek capital letter omega #x038f "Ώ" ; "Ώ" ; Greek capital letter Omega with tonos ;(string #x3b1) ; => small Greek letter alpha ;(string 945) ; => small Greek letter alpha #x03b1 "α" ; "α" ; Greek small letter alpha #x03ac "ά" ; "ά" ; Greek small letter alpha with tonos #x1f00 "ἀ" ; "ἀ" ; Greek small letter alpha with psili ; ἀ #x1f04 "ἄ" ; "ἄ" ; Greek small letter alpha with psili and oxia #x1f05 "ἅ" ; "ἅ" ; Greek small letter alpha with dasia and oxia #x1f70 "ὰ" ; "ὰ" ; Greek small letter alpha with varia #x03b2 "β" ; "β" ; Greek small letter beta #x03b3 "γ" ; "γ" ; Greek small letter gamma #x03b4 "δ" ; "δ" ; Greek small letter delta #x03b5 "ε" ; "ε" ; Greek small letter epsilon #x03ad "έ" ; "έ" ; Greek small letter epsilon with tonos #x03b6 "ζ" ; "ζ" ; Greek small letter zeta #x03b7 "η" ; "η" ; Greek small letter eta #x03ae "ή" ; "ή" ; Greek small letter eta with tonos #x03b8 "θ" ; "θ" ; Greek small letter theta #x03b9 "ι" ; "ι" ; Greek small letter iota #x03af "ί" ; "ί" ; Greek small letter iota with tonos. #x03ba "κ" ; "κ" ; Greek small letter kappa #x03bb "λ" ; "λ" ; Greek small letter lamda #x03bc "μ" ; "μ" ; Greek small letter mu #x03bd "ν" ; "ν" ; Greek small letter nu #x03be "ξ" ; "ξ" ; Greek small letter xi #x03bf "ο" ; "ο" ; Greek small letter omicron #x1f41 "ὁ" ; "ὁ" ; Greek small letter omicron with dasia #x03cc "ό" ; "ό" ; Greek small letter omicron with tonos #x03c0 "π" ; "π" ; Greek small letter pi #x03c1 "ρ" ; "ρ" ; Greek small letter rho #x1fe5 "ῥ" ; "ῥ" ; Greek small letter rho with dasia #x03c2 "ς" ; "ς" ; Greek small letter final sigma #x03c3 "σ" ; "σ" ; Greek small letter sigma #x03c4 "τ" ; "τ" ; Greek small letter tau #x03c5 "υ" ; "υ" ; Greek small letter upsilon #x03cd "ύ" ; "ύ" ; Greek small letter upsilon with tonos #x03c6 "φ" ; "φ" ; Greek small letter phi #x03c7 "χ" ; "χ" ; Greek small letter chi #x03c8 "ψ" ; "ψ" ; Greek small letter psi #x03c9 "ω" ; "ω" ; Greek small letter omega #x03ce "ώ" ; "ώ" ; Greek small letter omega with tonos #x1ff6 "ῶ" ; "ῶ" ; Greek small letter omega with perispomeni #x03d1 "ϑ" ; "ϑ" ; Greek theta symbol #x03d2 "ϒ" ; "ϒ" ; Greek upsilon with hook symbol #x03d6 "ϖ" ; "ϖ" ; Greek pi symbol #x03fe "Ͼ" ; "Ͼ" ; Greek capital dotted lunate sigma symbol #x03ff "Ͽ" ; "Ͽ" ; Greek capital reversed dotted lunate sigma symbol #x0472 "Ѳ" ; "Ѳ" ; Cyrillic capital letter Fita #x0473 "ѳ" ; "ѳ" ; Cyrillic small letter fita #x0485 "҅" ; "҅" ; Combining Cyrillic dasia pneumata #x0524 "Ԥ" ; "Ԥ" ; Cyrillic capital letter pe with descender #x0666 "٦" ; "٦" ; arabic-indic digit six #x1e16 "Ḗ" ; "Ḗ" ; Latin capital letter E with macron and acute #x1e17 "ḗ" ; "ḗ" ; Latin small letter e with macron and acute #x1fc6 "ῆ" ; "ῆ" ; Greek small letter eta with perispomeni #x1ffe "῾" ; "῾" ; Greek dasia #x2002 " " ; " " ; en space #x2003 " " ; " " ; em space #x2009 " " ; " " ; thin space #x200c "‌" ; "‌" ; Zero width non-joiner #x200d "‍" ; "‍" ; Zero width joiner #x200e "‎" ; "‎" ; Left-to-right mark #x200f "‏" ; "‏" ; Right-to-left mark #x2013 "–" ; "–" ; en dash #x2014 "—" ; "—" ; em dash #x2018 "‘" ; "‘" ; left single quotation mark #x2019 "’" ; "’" ; right single quotation mark #x201a "‚" ; "‚" ; single low-9 quotation mark #x201c "“" ; "“" ; left double quotation mark #x201d "”" ; "”" ; right double quotation mark #x201e "„" ; "„" ; double low-9 quotation mark #x2020 "†" ; "†" ; dagger #x2021 "‡" ; "‡" ; double dagger #x2022 "•" ; "•" ; bullet #x2026 "…" ; "…" ; horizontal ellipsis #x2030 "‰" ; "‰" ; per mille sign #x2032 "′" ; "′" ; prime #x2033 "″" ; "″" ; double prime #x2039 "‹" ; "‹" ; single left-pointing angle quotation mark #x203a "›" ; "›" ; single right-pointing angle quotation mark #x203e "‾" ; "‾" ; overline #x2044 "⁄" ; "⁄" ; fraction slash #x20ac "€" ; "€" ; euro sign #x2111 "ℑ" ; "ℑ" ; black-letter capital i #x2116 "№" ; "№" ; numero sign #x2118 "℘" ; "℘" ; script capital p #x211c "ℜ" ; "ℜ" ; black-letter capital r #x2122 "™" ; "™" ; trade mark sign #x2135 "ℵ" ; "ℵ" ; alef symbol #x2190 "←" ; "←" ; leftwards arrow #x2191 "↑" ; "↑" ; upwards arrow #x2192 "→" ; "→" ; rightwards arrow #x2193 "↓" ; "↓" ; downwards arrow #x2194 "↔" ; "↔" ; left right arrow #x2195 "↕" ; "↕" ; up down arrow #x21af "↯" ; "↯" ; downwards zigzag arrow #x21b5 "↵" ; "↵" ; downwards arrow with corner leftwards #x21d0 "⇐" ; "⇐" ; leftwards double arrow #x21d1 "⇑" ; "⇑" ; upwards double arrow #x21d2 "⇒" ; "⇒" ; rightwards double arrow #x21d3 "⇓" ; "⇓" ; downwards double arrow #x21d4 "⇔" ; "⇔" ; left right double arrow #x21d5 "⇕" ; "⇕" ; up down double arrow #x2200 "∀" ; "∀" ; for all #x2202 "∂" ; "∂" ; partial differential #x2203 "∃" ; "∃" ; there exists #x2204 "∄" ; "∄" ; there does not exist #x2205 "∅" ; "∅" ; empty set #x2206 "∆" ; "∆" ; increment #x2207 "∇" ; "∇" ; nabla #x2208 "∈" ; "∈" ; element of #x2209 "∉" ; "∉" ; not an element of #x220b "∋" ; "∋" ; contains as member #x220f "∏" ; "∏" ; n-ary product #x2211 "∑" ; "∑" ; n-ary summation #x2212 "−" ; "−" ; minus sign #x2217 "∗" ; "∗" ; asterisk operator #x221a "√" ; "√" ; square root #x221b "∛" ; "∛" ; cube root #x221c "∜" ; "∜" ; fourth root #x221d "∝" ; "∝" ; proportional to #x221e "∞" ; "∞" ; infinity #x2220 "∠" ; "∠" ; angle #x2227 "∧" ; "∧" ; logical and #x2228 "∨" ; "∨" ; logical or #x2229 "∩" ; "∩" ; intersection #x222a "∪" ; "∪" ; union #x222b "∫" ; "∫" ; integral #x2234 "∴" ; "∴" ; therefore #x223c "∼" ; "∼" ; tilde operator #x2245 "≅" ; "≅" ; approximately equal to #x2248 "≈" ; "≈" ; almost equal to #x2260 "≠" ; "≠" ; not equal to #x2261 "≡" ; "≡" ; identical to #x2264 "≤" ; "≤" ; less-than or equal to #x2265 "≥" ; "≥" ; greater-than or equal to #x2282 "⊂" ; "⊂" ; subset of #x2283 "⊃" ; "⊃" ; superset of #x2284 "⊄" ; "⊄" ; not a subset of #x2285 "⊅" ; "⊅" ; not a superset of #x2286 "⊆" ; "⊆" ; subset of or equal to #x2287 "⊇" ; "⊇" ; superset of or equal to #x2295 "⊕" ; "⊕" ; circled plus #x2296 "⊖" ; "⊖" ; circled minus #x2297 "⊗" ; "⊗" ; circled times #x22a5 "⊥" ; "⊥" ; up tack #x22c5 "⋅" ; "⋅" ; dot operator #x22ee "⋮" ; "⋮" ; vertical ellipsis #x2300 "⌀" ; "⌀" ; diameter sign #x2308 "⌈" ; "⌈" ; left ceiling #x2309 "⌉" ; "⌉" ; right ceiling #x230a "⌊" ; "⌊" ; left floor #x230b "⌋" ; "⌋" ; right floor #x2329 "⟨" ; "〈" ; left-pointing angle bracket #x232a "⟩" ; "〉" ; right-pointing angle bracket #x2588 "█" ; "█" ; full block #x25b2 "▲" ; "▲" ; black up-pointing triangle #x25ba "►" ; "►" ; black right-pointing pointer #x25bc "▼" ; "▼" ; black down-pointing triangle #x25c4 "◄" ; "◄" ; black left-pointing pointer #x25ca "◊" ; "◊" ; lozenge #x25cb "○" ; "○" ; white circle #x25cf "●" ; "●" ; black circle #x25e1 "◡" ; "◡" ; lower half circle #x2605 "★" ; "★" ; black star #x2606 "☆" ; "☆" ; white star #x262d "☭" ; "☭" ; hammer and sickle #x262f "☯" ; "☯" ; yin yang #x263a "☺" ; "☺" ; white smiling face #x263b "☻" ; "☻" ; black smiling face #x2640 "♀" ; "♀" ; female sign #x2642 "♂" ; "♂" ; male sign #x264f "♏" ; "♏" ; scorpius #x2660 "♠" ; "♠" ; black spade suit #x2661 "♡" ; "♡" ; white heart suit #x2662 "♢" ; "♢" ; white diamond suit #x2663 "♣" ; "♣" ; black club suit #x2664 "♢" ; "♤" ; white spade suit #x2665 "♥" ; "♥" ; black heart suit #x2666 "♦" ; "♦" ; black diamond suit #x2667 "♧" ; "♧" ; white club suit #x26a1 "⚡" ; "⚡" ; high voltage sign #x2721 "✡" ; "✡" ; star of david #x27e8 "⟨" ; "⟨" ; mathematical left angle bracket #x27e9 "⟩" ; "⟩" ; mathematical right angle bracket ) ; data end ) ) ; (print ql-html-entity-hash-table) ; (message "%s" ql-html-entity-hash-table) ; (gethash ?! ql-html-entity-hash-table) => "!" ; (gethash ?\u27e8 ql-html-entity-hash-table) => "⟨" ; (gethash #x27e9 ql-html-entity-hash-table) => "⟩" ; (hash-table-size ql-html-entity-hash-table) => 499 ; (hash-table-count ql-html-entity-hash-table) => 444 ;------------------------------------------------------------------ ;;;###autoload (defconst ql-html-ascii-table ; These attributes are for ASCII symbols only! '[ ; This is array of integers, indexed by characters' numeric value. ; 0 ; 0 => zero value means DO NOT "htmlize" the character. ; 1 ; 1 => This is HTML meta character (-="&#;...). ; ; Substiitution by correspondent HTML entity is dangerous ; ; --- careless replacement can ruin HTML code! ; ; Always ask user for confirmation. ; 2 ; 2 => In a function operating on a single symbol (at cursor) ; ; --- if symbolic entity for the character exists, ; ; ask user whether to htmlize this char; ; ; otherwise don't (i.e. don't use &#xxx; form). ; ; In a function processing a string (or a region) ; ; --- DO NOT htmlize the character. ; 3 ; 3 => Ask user whether to htmlize this char. ; ; If there is no symbolic entity for it, use numeric form. ; 4 ; 4 => DO htmlize the char (either symbolic entity or &#xxx;). ; ; This value (3) actually should NEVER be used. 0 ; 0 = NULL = Null ; Ctrl-@ 0 ; 1 = SOH = Start of Heading ; Ctrl-A 0 ; 2 = STX = Start of Text ; Ctrl-B 0 ; 3 = ETX = End of Text ; Ctrl-C 0 ; 4 = EOT = End of Transmission ; Ctrl-D 0 ; 5 = ENQ = Enquiry ; Ctrl-E 0 ; 6 = ACK = Acknowledgement ; Ctrl-F 0 ; 7 = BELL = Bell ('\a') ; Ctrl-G 0 ; 8 = BS = Backspace ('\b') ; Ctrl-H 0 ; 9 = HT = Horizontal Tab ('\t') ; Ctrl-I 0 ; 10 = LF = Line Feed ('\n') ; Ctrl-J 0 ; 11 = VT = Vertical Tab ('\v') ; Ctrl-K 0 ; 12 = FF = Form Feed ('\f') ; Ctrl-L 0 ; 13 = CR = Carriage Return ('\r') ; Ctrl-M 0 ; 14 = SO = Shift Out ; Ctrl-N 0 ; 15 = SI = Shift In ; Ctrl-O 0 ; 16 = DLE = Data Link Escape ; Ctrl-P 0 ; 17 = DC1 = Device Control 1 (often XON) ; Ctrl-Q 0 ; 18 = DC2 = Device Control 2 ; Ctrl-R 0 ; 19 = DC3 = Device Control 3 (often XOFF) ; Ctrl-S 0 ; 20 = DC4 = Device Control 4 ; Ctrl-T 0 ; 21 = NAK = Negative Acknowledgement ; Ctrl-U 0 ; 22 = SYN = Synchronous Idle ; Ctrl-V 0 ; 23 = ETB = End of Transmission Block ; Ctrl-W 0 ; 24 = CAN = Cancel ; Ctrl-X 0 ; 25 = EM = End of Medioum ; Ctrl-Y 0 ; 26 = SUB = Substitute ; Ctrl-Z 0 ; 27 = ESC = Escape ('\e') ; Ctrl-[ 0 ; 28 = FS = File Separator ; Ctrl-\ 0 ; 29 = GS = Group Separator ; Ctrl-] 0 ; 30 = RS = Record Separator ; Ctrl-^ 0 ; 31 = US = Unit Separator ; Ctrl-_ 0 ; 32 = SPC = Space 1 ; 33 = ! = Exclamation mark (!) ; Careless substitution of this will ruin HTML! 1 ; 34 = " = Double quote (") ; Careless substitution of this will ruin HTML! 1 ; 35 = # = Number sign, hash sign (#) ; Careless substitution of this will ruin HTML! 2 ; 36 = $ = Dollar sign ($) 2 ; 37 = % = Percent sign (%) ; Careless substitution of this can ruin HTML! 1 ; 38 = & = Ampersand (&) ; Careless substitution of this will ruin HTML! 1 ; 39 = ' = Apostrophe (') ; No need to replace apostrophe by HTML code. 2 ; 40 = ( = Left parenthesis (() 2 ; 41 = ) = Right parenthesis ()) 2 ; 42 = * = Asterisk (star) (*) 2 ; 43 = + = Plus sign (+) 2 ; 44 = , = Comma (,) 0 ; 45 = - = Hyphen-minus 2 ; 46 = . = Full stop (.) 1 ; 47 = / = Slash (/) ; Careless substitution of this will ruin HTML! 0 ; 48 = 0 No need to replace digits by HTML code. 0 ; 49 = 1 ... 0 ; 50 = 2 ... 0 ; 51 = 3 ... 0 ; 52 = 4 ... 0 ; 53 = 5 ... 0 ; 54 = 6 ... 0 ; 55 = 7 ... 0 ; 56 = 8 ... 0 ; 57 = 9 No need to replace digits by HTML code. 2 ; 58 = : = Colon (:) 1 ; 59 = ; = Semicolon (;) ; Careless substitution of this will ruin HTML! 1 ; 60 = < = Less-than sign (<) ; Careless substitution of this will ruin HTML! 1 ; 61 = = = Equals sign (=) ; Careless substitution of this will ruin HTML! 1 ; 62 = > = Greater-than sign (>) ; Careless substitution of this will ruin HTML! 2 ; 63 = ? = Question mark (?) 2 ; 64 = @ = At sign, commercial at (@) 0 ; 65 = A No need to html-encode Latin letters. 0 ; 66 = B ... 0 ; 67 = C ... 0 ; 68 = D ... 0 ; 69 = E ... 0 ; 70 = F ... 0 ; 71 = G ... 0 ; 72 = H ... 0 ; 73 = I ... 0 ; 74 = J ... 0 ; 75 = K ... 0 ; 76 = L ... 0 ; 77 = M ... 0 ; 78 = N ... 0 ; 79 = O ... 0 ; 80 = P ... 0 ; 81 = Q ... 0 ; 82 = R ... 0 ; 83 = S ... 0 ; 84 = T ... 0 ; 85 = U ... 0 ; 86 = V ... 0 ; 87 = W ... 0 ; 88 = X ... 0 ; 89 = Y ... 0 ; 90 = Z No need to html-encode Latin letters. 2 ; 91 = [ = Left square bracket ([, [) 2 ; 92 = \ = Backslash (\) 2 ; 93 = ] = Right square bracket (], ]) 2 ; 94 = ^ = Caret (^) 2 ; 95 = _ = Underscore (_, _) 2 ; 96 = ` = Backtick (`) 0 ; 97 = a No need to html-encode latin letters. 0 ; 98 = b ... 0 ; 99 = c ... 0 ; 100 = d ... 0 ; 101 = e ... 0 ; 102 = f ... 0 ; 103 = g ... 0 ; 154 = h ... 0 ; 105 = i ... 0 ; 106 = j ... 0 ; 107 = k ... 0 ; 108 = l ... 0 ; 109 = m ... 0 ; 110 = n ... 0 ; 111 = o ... 0 ; 112 = p ... 0 ; 113 = q ... 0 ; 114 = r ... 0 ; 115 = s ... 0 ; 116 = t ... 0 ; 117 = u ... 0 ; 118 = v ... 0 ; 119 = w ... 0 ; 120 = x ... 0 ; 121 = y ... 0 ; 122 = z No need to html-encode latin letters. 2 ; 123 = { = Left curly bracket ({, {) 2 ; 124 = | = Vetical bar (|, |, |) 2 ; 125 = } = Right curly bracket (}, }) 2 ; 126 = ~ = Tilde (˜) 0 ; 127 = DEL = Delete ; Ctrl-? ] ) ; (length ql-html-ascii-table) = 128 ; (aref ql-html-ascii-table ?!) = 1 ; (aref ql-html-ascii-table ?.) = 2 ; (aref ql-html-ascii-table ??) = 2 ; (aref ql-html-ascii-table ?s) = 0 ; (message "%s" ql-html-ascii-table) ;------------------------------------------------------------------ ; ; Decide if the character should be replaced by HTML entity. ; The operation mode is integer: ; 0 --- Dummy. Will return 0. ; 1 --- processing just a single character; ; 2 --- processing a region; ; 3 --- processing a whole buffer. ; Currently mode 3 (whole buffer) is equivalent to mode 2 (a region). ; Return value is nil OR thtrr-element list (replace-p for-all-p entity): ; `replace-p' if the character should be reaplced by entity (bool), ; `for-all-p' if the same should be done for all such charatcers. ; `entity' is standard HTML entity for this character (string). ; The value of `entity' can turn out to be 'nil, ; if standard HTML entity for the char hasm't been found ; -- the caller should check the value and if it is 'nil ; use the numeric form of HTML entity: `&#xxx;'. ; The return value of 'nil denotes error status: illegal arguments. ; E.g.,: ; ('nil 't e) --- Do NOT replace the char here and all further occurences of it, ; i.e. you may cache this answer and not ask again. ; ('t 'mil e) --- Replace just one (here), but if the character meets again, ; ask again. ; Of course, the `for-all-p' is meaningless for single character operation mode. ; (defun ql-html-char-encode-status (the-char the-mode) (catch 'user-error (unless the-char (message "The character given is null.") (throw 'user-error 'nil) ) (unless (characterp the-char) (message "The first argument MUST be a character.") (throw 'user-error 'nil) ) (unless the-mode (message "The mode given is null.") (throw 'user-error 'nil) ) (unless (integerp the-mode) (message "The second argument (mode) MUST be an integer.") (throw 'user-error 'nil) ) (when (= 0 the-mode) (message "The mode is 0 (dummy), return ('nil 'nil 'nil).") (throw 'user-error `(nil nil nil)) ) (when (or (> 0 the-mode) (< 3 the-mode)) (message "Illegal mode %d." the-mode) (throw 'user-error 'nil) ) ; Ok, the arguments appear to be acceptable. do the real job. (if (< the-char #x80) ; Handle ASCII symbols (<=127) a bit nore carefully (let ; ASCII symbols ((r (aref ql-html-ascii-table the-char))) ; class of the character (if (> r 0) ; if (r == 0), => do NOT replace the char (let ; r > 0 (; Use 0 as default value returned by gethash ; to discern 'nil value (found) from unfound element with the given key. (entity (gethash the-char ql-html-entity-hash-table 0)) (entity-found-p) ) ; If html entity for the character is known (has been found on the table) (setq entity-found-p (and ;(not (and (integerp entity) (equal 0 entity))) ; NOT "the key not found" (not (null entity)) (stringp entity) (not (equal 0 (length entity))) ) ) (cond ((equal r 1) ; This is HTML meta character (-="&#;!. ; All HTML metacharacters have correspondent HTML entites, ; BUT substituting the latters for the formers is dangerous ; --- careless replacement can ruin HTML code! ; In single character mode: ; --- _always_ request user for confirmation for HTML metacharacters; ; in region (and whole buffer) mode: ; --- _never_ `html-ize' HTML metacharacters. (list (if (equal 1 the-mode) (ql-request-confirmation (format (if (equal 34 the-char) ; double quote "The `%c' (%d) is a HTML metacharacter. Really replace? (y/n): " ; else "The \"%c\" (%d) is a HTML metacharacter. Really replace? (y/n): " ) the-char the-char ) ) ) (or (equal 2 the-mode) (equal 3 the-mode) ) entity ; It may be 'nil! The caller should take care of it oneself! ) ) ((equal r 2) ; In a function operating on a single symbol (at cursor) ; --- if symbolic entity for the character exists, ; ask user whether to htmlize this char; ; otherwise don't (i.e. don't use &#xxx; form). ; In a function processing a string (or a region) ; --- DO NOT htmlize the character. (list (if (and (equal 1 the-mode) entity-found-p (not (null entity))) (let ((user-reply)) (setq user-reply (ql-request-confirmation (format (if (equal 34 the-char) ; double quote "The `%c' is an ASCII symbol (%d < 128). Really replace? (y/n): " ; else "The \"%c\" is an ASCII symbol (%d < 128). Really replace? (y/n): " ) the-char the-char ) ) ) (if ql-html-debug-p (message "Got from user: %s" user-reply)) user-reply ) ) (or (equal the-mode 2) ; region (equal the-mode 3)) ; whole buffer entity ; It can be 'nil! The caller should take care of it oneself! ) ) ((equal r 3) ; Ask user whether to htmlize this char. ; If there is no symbolic entity for it, use numeric form. ) (let (user-reply) (setq user-reply (ql-request-key-from-user (if (equal 1 the-mode) '("yY1! \n\r" "nN0qQ\u0004\u00ff") '("yY1 \n\r" "nN0\u00ff" "qQ\u0004" "!") ) (format (if (equal 34 the-char) ; double quote "The `%c' is an ASCII symbol (%d < 128). Really replace? (y/n): " ; else "The \"%c\" is an ASCII symbol (%d < 128). Really replace? (y/n): " ) the-char the-char ) ) ) (if ql-html-debug-p (message "Got %s from user") user-reply) ; debugging (list (or (equal ?y user-reply) (equal ?! user-reply) ) (or (equal ?! user-reply) (equal ?q user-reply) ) entity ) ) ((> r 3) ; Doesn't matter whether it is a sngle char or a region. ; >= 4 => DO htmlize the char (either symbolic entity or &#xxx;). ; These values (4+) actually should NEVER be used. (list 't 't entity) ; The entity can be 'nil! ) ) ) ; else: r == 0 (list 'nil 't 'nil) ; Do NOT replace it here and anywhere after. ) ) ; else: (the-char >= #x80) --- i.e. not an ASCII symbol (let (; Use 0 as default value returned by gethash ; to discern 'nil value (found) from unfound element with the given key. (entity (gethash the-char ql-html-entity-hash-table 0)) (entity-found-p) ) ; If html entity for the character is known (has been found on the table) (setq entity-found-p (and ;(not (and (integerp entity) (equal 0 entity))) ; NOT "the key not found" (not (null entity)) (stringp entity) (not (equal 0 (length entity))) ) ) (if entity-found-p ; HTML entity is known (has been found on the alist) (list ; If the entity for the char is present on the alist but is 'mil, ; it means that this character should NEVER be substituted by entity. (not (null entity)) ; Symbolic HTML entity is known for the char. 't entity ) ; else: HTML entity for the character has NOT been found on the alist (if (equal the-mode 1) '(t nil nil) ; In single character mode user must be aware himself. ; else: Processing a region or a whole buffer. (let((user-reply)) (setq user-reply (ql-request-key-from-user '("yY1 \n\r" "!" "nN0\u00ff" "qQ\u0004") (format "No symbolic entity for \"%c\". Replace it by numeric enity (&#x%x;)?" the-char the-char ) ) ) (list (or (equal ?y user-reply) (equal ?! user-reply) ) (or (equal ?! user-reply) (equal ?q user-reply) ) entity) ) ) ) ) ) ) ) ; (ql-html-char-encode-status 'nil "") ∅ => 'nil ; (ql-html-char-encode-status 't 1) ∅ => 'nil ; (ql-html-char-encode-status "" 1) ∅ => 'nil ; (ql-html-char-encode-status '(%) 1) ∅ => `nil ; Less-than sign ; (ql-html-char-encode-status ?< 0) ∅ => (`nil `nil `nil) ; (ql-html-char-encode-status ?< 1): ; y => ('t 'nil "<") ; n => ('nil 'nil "<") ; (ql-html-char-encode-status ?< 2) ∅ => ('nil 't "<") ; (ql-html-char-encode-status ?< 3) ∅ => ('nil 't "<") ; Percent sign ; (ql-html-char-encode-status ?% 1) ; y => ('t 'nil "%") ; n => ('nil 'nil "%") ; (ql-html-char-encode-status ?% 2) ∅ => ('nil 't "%") ; (ql-html-char-encode-status ?% 3) ∅ => ('nil 't "%") ; Latin small letter A ; (ql-html-char-encode-status ?a 1) ∅ => ('nil 't 'nil) ; (ql-html-char-encode-status ?a 2) ∅ => ('nil 't 'nil) ; (ql-html-char-encode-status ?a 3) ∅ => ('nil 't 'nil) ; Cyrillic small letter ZHe ("ж") ; (ql-html-char-encode-status #x0436 1)∅ => ('nil 't 'nil) ; (ql-html-char-encode-status #x0436 2)∅ => ('nil 't 'nil) ; (ql-html-char-encode-status #x0436 3)∅ => ('nil 't 'nil) ; Em dash ("—") (\TeX{}: ---; HTML: —) ; (ql-html-char-encode-status #x2014 1)∅ => ('t 't "—") ; (ql-html-char-encode-status #x2014 2)∅ => ('t 't "—") ; (ql-html-char-encode-status #x2014 3)∅ => ('t 't "—") ; Empty set ("∅") (mathemtaics) (\TeX{}: \varnothing{}, \empty{}set{}; HTML: ∅) ; (ql-html-char-encode-status #x2205 1)∅ => ('t 't "∅") ; (ql-html-char-encode-status #x2205 2)∅ => ('t 't "∅") ; (ql-html-char-encode-status #x2205 3)∅ => ('t 't "∅") ; For all ("∀") (universal quantification) (\TeX{}: \forall{}; HTML: ∀) ; (ql-html-char-encode-status #x2200 1)∅ => ('t 't "&;forall") ; (ql-html-char-encode-status #x2200 2)∅ => ('t 't "&;forall") ; (ql-html-char-encode-status #x2200 3)∅ => ('t 't "&;forall") ; There exists ("∃") (existential quantification) (\TeX{}: \exists{}; HTML: ∃) ; (ql-html-char-encode-status #x2203 1)∅ => ('t 't "∃") ; (ql-html-char-encode-status #x2203 2)∅ => ('t 't "∃") ; (ql-html-char-encode-status #x2203 3)∅ => ('t 't "∃") ; Hanunoo letter PA (ᜩ) --- NOT present in the html entities table. ; (ql-html-char-encode-status #x1729 1)∅ => ('t 'nil 'nil) ; (ql-html-char-encode-status #x1729 2) ; y => ('t 'nil 'nil) ; n => ('nil 'nil 'nil) ; (ql-html-char-encode-status #x1729 3) ; y => ('t 'nil 'nil) ; n => ('nil 'nil 'nil) ; ; (equal '(t nil "%") '(t nil "%")) ; Works! ;------------------------------------------------------------------ ; ; Replace character(s) at the current position or in the region (if active) ; by their HTML entites: symbolic (if found) or numeric (&#xxx;). ; ;;;###autoload (defun ql-html-encode (the-command-prefix) "Replace character(s) at the current position or in the region (if it is active)\nby their HTML entites: symbolic (if there is known one) or numeric (&#xxx;).\nBe careful with ASCII characters (below 128), ask user confirmation if in doubt\n(e.g. in case of HTML metacharacters (\"!&, etc), as it can ruin HTML code)." (interactive "P") ; "P" gets prefix arg in raw form. Does not do I/O. (let ((operation-mode 0)) (if (use-region-p) ; Region is active, => operate with current region only; ; save and later restore marker position. (setq operation-mode 2) ; else: region is NOT active, => either whole buffer or a single char mode. (if (null the-command-prefix) ; Region is NOT active, and command prefix argument is not set, ; => single character mode: only process one character at current postion. (setq operation-mode 1) ; else: No region active AND command prefix IS set, => whole buffer mode. (setq operation-mode 3) ) ) (if (equal 1 operation-mode) (let ; process single char at point ((this-char (char-after)) this-entity this-replace ) (if (setq this-entity (ql-html-char-encode-status this-char operation-mode)) (progn (if ql-html-debug-p (message "status for %c: %S" this-char this-entity)) ; debugging (setq this-replace (car this-entity)) (if ql-html-debug-p (message "replace: %S" this-replace)) ; debugging (if this-replace (progn (setq this-entity (car (cdr (cdr this-entity)))) (if ql-html-debug-p (message "%S" this-entity)) ; debugging (if (null this-entity) ; No symbolic entity know for this character. ; Use numeric form of entity. (setq this-entity (format "&#x%x;" this-char)) ) (if ql-html-debug-p (message "replace!")) ; debugging (insert this-entity) (delete-char 1) ) ; else: Do NOT replace the character. (forward-char) ) ) ; else: Some error occured. (lwarn emacs :error "Got error from ql-html-char-encode-status for single char %c" this-char) ) ) ; else: mode 2 or 3 (save-excursion (save-restriction (let ((this-point-min (make-marker)) (this-point-max (make-marker)) i i1 ; list: (char entity) or (char replace-p for-all-p entity) i-char i-entity i-replace i-for-all ql-entities ; cache alist of chars with their entities and statuses i-on-cache ) (if (equal 2 operation-mode) (progn ; region mode ;(narrow-to-region (region-beginning) (region-end)) (set-marker this-point-min (region-beginning)) (set-marker this-point-max (region-end)) ) ; else: mode == 3 (whole buffer) (set-marker this-point-min (point-min)) (set-marker this-point-max (point-max)) ) ; Ok, do it... (goto-char this-point-min) (while (< (point) this-point-max) (setq i-char (char-after)) (setq i-entity 'nil) (setq i-replace 'nil) (setq i-for-all 'nil) (setq i-on-cache 'nil) (if (not (null (setq i (assoc i-char ql-entities)))) (progn (setq i-on-cache 't) (if ql-html-debug-p (message "`%c' (%d) 01 i: %s" i-char i-char i)) ; debugging (setq i1 (cdr i)) ; (car i) is i-char, while we need cdr. ; Extract the character properties (elements of the list). (setq i-replace (car i1)) (if ql-html-debug-p (message "`%c' (%d) 02 i-replace: %s" i-char i-char i-replace)) ; debugging (setq i1 (cdr i1)) (setq i-for-all (car i1)) (if ql-html-debug-p (message "`%c' (%d) 03 i-for-all: %s" i-char i-char i-for-all)) ; debugging (setq i-entity (car (cdr i1))) (if ql-html-debug-p (message "`%c' (%d) 04 i-entity: %s" i-char i-char i-entity)) ; debugging ) ) (if (not i-for-all) ; Either haven't come across such character yet (not found in our cache) ; or the previous occurrence of the character yielded non-cacheable status. ; Anyway we have to derive the character status (once again). (if (setq i1 (ql-html-char-encode-status i-char operation-mode)) (progn ; Dig up individual properties. (if ql-html-debug-p (message "`%c' (%d) 1: i1: %s" i-char i-char i1)) ; debugging (setq i-for-all (cdr i1)) ; use i-for-all as a temporary variable (if ql-html-debug-p (message "`%c' (%d) 2 i-for-all %s" i-char i-char i-for-all)) ; debugging (setq i-replace (car i1)) (if ql-html-debug-p (message "`%c' (%d) 3 i-replace: %s" i-char i-char i-replace)) ; debugging (setq i-entity (car (cdr i-for-all))) (if ql-html-debug-p (message "`%c' (%d) 4 i-entity: %s" i-char i-char i-entity)) ; debugging (if (and i-replace (null i-entity)) ; We have to replace the character but have no symbolic entity for it, ; Use numeric form of entity. (progn (setq i-entity (format "&#x%x;" i-char)) (if ql-html-debug-p (message "`%c' (%d) 5 i-entity: %s" i-char i-char i-entity)) ; debugging ;(let ((l (list nil nil))) (setcdr l '1) l) (setcdr i-for-all (cons i-entity nil)) (if ql-html-debug-p (message "`%c' (%d) 6 i-for-all: %s" i-char i-char i-for-all)) ; debugging ) ) (setq i-for-all (car i-for-all)) (if ql-html-debug-p (message "`%c' (%d) 7 i-for-all: %s" i-char i-char i-for-all)) ; debugging ; First add the (char properties) pair to the cache alist. (if i-on-cache (progn ; There is an entry for the character on the cache alist, update it. (setcdr i i1) (if ql-html-debug-p (message "`%c' (%d) 8 i: %s" i-char i-char i)) ; debugging ) ; else: Not yet on the alist, add it. (progn (setq ql-entities (cons (setq i (cons i-char i1)) ql-entities)) (if ql-html-debug-p (message "`%c' (%d) 9 ql-entites: %s" i-char i-char ql-entities)) ; debugging ) ) ) ; else: The function above returns 'nll only if there was an error. (lwarn emacs :error "Got error from ql-html-char-encode-status for char `%c', mode %d" i-char operation-mode) ) ) (if ql-html-debug-p (message "`%c' 7 i: %s" i-char i)) ; debugging (if (null i) (forward-char) ; else (progn (if (not i-replace) (forward-char) ; else: do replace the char by it's entity. (if ql-html-debug-p (message "Replace!")) ; debugging (insert i-entity) (delete-char 1) ) ) ) ) (if ql-html-debug-p (message "ql-entities: %s" ql-entities)) ; debugging ) )) ; save-restriction) ; save-excursion) ) ) ) ; Bind the function to [Ctrl-&] globallu. (global-set-key (kbd "C-&") 'ql-html-encode) ; Bind the function to [Ctrl-&] for html node only. (add-hook 'html-mode-hook (lambda () (local-set-key "C-&" (quote ql-html-encode))) ) ;------------------------------------------------------------------ ; ; Replace a character at the current position by it's &#xxx; code (html entity). ; ;;;###autoload (defun ql-html-encode-char () "Replace a character at the current position by it's &#xxx; code (html entity). Do NOT replace characters below 128 unless explcitly requested (ask it)." (interactive) (let ((the-char (char-after)) (do-replace-p nil) ; set to 't to force replacement for chars below 128. entity ) ; This function operates within Emacs own character coding set, ; i.e. character codepoints are those of Emacs rather than UCS. ; So do we need to (encode-char ... 'ucs) here (e.g. for our tables lookups)? ; Probably not as we all our tables, alists and arrays use Emacs' own codes. ;(insert (format "&#%d;" (encode-char the-char 'ucs))) ; (if (< the-char #x80) ; Handle ASCII symbols (<=127) a bit nore carefully (let ((r (aref ql-html-ascii-table the-char))) (if (> r 0) (let ((entity-found-p)) ; Use 0 as default value returned by gethash ; to discern 'nil value (found) from unfound element with the given key. (setq entity (gethash the-char ql-html-entity-hash-table 0)) ; If html entity for the character is known (has been found on the table) (setq entity-found-p (and ;(not (and (integerp entity) (equal 0 entity))) ; NOT "the key not found" (not (null entity)) (stringp entity) (not (equal 0 (length entity))) ) ) (if (> r 2) (setq do-replace-p 't) ; else: (r <= 2) (if (or (and entity-found-p (not (null entity))) (> r 1)) (if (not do-replace-p) (set 'do-replace-p (ql-request-confirmation (format "The \"%c\" is an ASCII symbol (codepoint %d < 128). Really replace? (y/n): " the-char the-char)) ) ) ; else: (r == 1) and there is no symbolic entity for this character. (if do-replace-p (message "No symbolic entity for \"%c\"." the-char)) (setq do-replace-p 'nil) ) ) (if (and do-replace-p (null entity)) (setq entity (format "%x;" the-char)) ) ) ; else: (r <= 0) (setq do-replace-p 'nil) ) ) ; else: (the-char >= #x80) --- i.e. not an ASCII symbol (let ((entity-found-p)) ; Use 0 as default value returned by gethash ; to discern 'nil value (found) from unfound element with the given key. (setq entity (gethash the-char ql-html-entity-hash-table 0)) ; If html entity for the character is known (has been found on the table) (setq entity-found-p (and ;(not (and (integerp entity) (equal 0 entity))) ; NOT "the key not found" (not (null entity)) (stringp entity) (not (equal 0 (length entity))) ) ) (if (not (setq do-replace-p entity-found-p)) (message "No symbolic entity for \"%c\"." the-char) ) ) ) (if ql-html-debug-p (message "entity: %s" entity)) ; debugging (if (not do-replace-p) (forward-char) ; else: do replace the char by it's entity (insert entity) (delete-char 1) ) ) ) ; Bind the function to [Ctrl-%] globallu. (global-set-key (kbd "C-%") 'ql-html-encode-char) ; Bind the function to [Ctrl-%] for html node only. (add-hook 'html-mode-hook (lambda () (local-set-key "C-%" (quote ql-html-encode-char))) ) ;------------------------------------------------------------------ ; xyz ; xxx ; zyx ;------------------------------------------------------------------ (provide 'ql-html-f) ;------------------------------------------------------------------ (when (and ql-html-debug-p load-in-progress) (message "Loading \"%s\"... done" load-file-name) ) ;------------------------------------------------------------------ ; ; (setq buffer-file-coding-system 'utf-8) ; (setq coding-system-for-read 'utf-8) ; (setq coding-system-for-write 'utf-8) ; ;;; Local Variables: ;;; mode: emacs-lisp ;;; coding: utf-8 ;;; coding-system-for-read: utf-8 ;;; coding-system-for-write: utf-8 ;;; buffer-file-coding-system: utf-8 ;;; End: