unit HtmlTags;
interface
uses
Classes, DomCore;
const
MAX_TAGS_COUNT = 128;
MAX_FLAGS_COUNT = 32;
type
THtmlTagSet = set of 0..MAX_TAGS_COUNT - 1;
THtmlTagFlags = set of 0..MAX_FLAGS_COUNT - 1;
THtmlTag = class
private
FName: TDomString;
FNumber: Integer;
FParserFlags: THtmlTagFlags;
FFormatterFlags: THtmlTagFlags;
public
constructor Create(const AName: TDomString; ANumber: Integer; AParserFlags, AFormatterFlags: THtmlTagFlags);
property Name: TDomString read FName;
property Number: Integer read FNumber;
property ParserFlags: THtmlTagFlags read FParserFlags;
property FormatterFlags: THtmlTagFlags read FFormatterFlags;
end;
TCompareTag = function(Tag: THtmlTag): Integer of object;
THtmlTagList = class
private
FList: TList;
FUnknownTag: THtmlTag;
FSearchName: WideString;
FSearchNumber: Integer;
function CompareName(Tag: THtmlTag): Integer;
function CompareNumber(Tag: THtmlTag): Integer;
function GetTag(Compare: TCompareTag): THtmlTag;
public
constructor Create;
destructor Destroy; override;
function GetTagByName(const Name: TDomString): THtmlTag;
function GetTagByNumber(Number: Integer): THtmlTag;
end;
TURLSchemes = class(TStringList)
private
FMaxLen: Integer;
public
function Add(const S: String): Integer; override;
function IsURL(const S: String): Boolean;
function GetScheme(const S: String): String;
property MaxLen: Integer read FMaxLen;
end;
var
HtmlTagList: THtmlTagList;
URLSchemes: TURLSchemes;
const
UNKNOWN_TAG = 0;
A_TAG = 1;
ABBR_TAG = 2;
ACRONYM_TAG = 3;
ADDRESS_TAG = 4;
APPLET_TAG = 5;
AREA_TAG = 6;
B_TAG = 7;
BASE_TAG = 8;
BASEFONT_TAG = 9;
BDO_TAG = 10;
BIG_TAG = 11;
BLOCKQUOTE_TAG = 12;
BODY_TAG = 13;
BR_TAG = 14;
BUTTON_TAG = 15;
CAPTION_TAG = 16;
CENTER_TAG = 17;
CITE_TAG = 18;
CODE_TAG = 19;
COL_TAG = 20;
COLGROUP_TAG = 21;
DD_TAG = 22;
DEL_TAG = 23;
DFN_TAG = 24;
DIR_TAG = 25;
DIV_TAG = 26;
DL_TAG = 27;
DT_TAG = 28;
EM_TAG = 29;
FIELDSET_TAG = 30;
FONT_TAG = 31;
FORM_TAG = 32;
FRAME_TAG = 33;
FRAMESET_TAG = 34;
H1_TAG = 35;
H2_TAG = 36;
H3_TAG = 37;
H4_TAG = 38;
H5_TAG = 39;
H6_TAG = 40;
HEAD_TAG = 41;
HR_TAG = 42;
HTML_TAG = 43;
I_TAG = 44;
IFRAME_TAG = 45;
IMG_TAG = 46;
INPUT_TAG = 47;
INS_TAG = 48;
ISINDEX_TAG = 49;
KBD_TAG = 50;
LABEL_TAG = 51;
LEGEND_TAG = 52;
LI_TAG = 53;
LINK_TAG = 54;
MAP_TAG = 55;
MENU_TAG = 56;
META_TAG = 57;
NOBR_TAG = 58;
NOFRAMES_TAG = 59;
NOSCRIPT_TAG = 60;
OBJECT_TAG = 61;
OL_TAG = 62;
OPTGROUP_TAG = 63;
OPTION_TAG = 64;
P_TAG = 65;
PARAM_TAG = 66;
PRE_TAG = 67;
Q_TAG = 68;
S_TAG = 69;
SAMP_TAG = 70;
SCRIPT_TAG = 71;
SELECT_TAG = 72;
SMALL_TAG = 73;
SPAN_TAG = 74;
STRIKE_TAG = 75;
STRONG_TAG = 76;
STYLE_TAG = 77;
SUB_TAG = 78;
SUP_TAG = 79;
TABLE_TAG = 80;
TBODY_TAG = 81;
TD_TAG = 82;
TEXTAREA_TAG = 83;
TFOOT_TAG = 84;
TH_TAG = 85;
THEAD_TAG = 86;
TITLE_TAG = 87;
TR_TAG = 88;
TT_TAG = 89;
U_TAG = 90;
UL_TAG = 91;
VAR_TAG = 92;
BlockTags = [ADDRESS_TAG, BLOCKQUOTE_TAG, CENTER_TAG, DIV_TAG, DL_TAG, FIELDSET_TAG, {FORM_TAG,} H1_TAG, H2_TAG, H3_TAG, H4_TAG, H5_TAG, H6_TAG, HR_TAG, NOBR_TAG, NOSCRIPT_TAG, OL_TAG, PRE_TAG, TABLE_TAG, UL_TAG];
BlockParentTags = [ADDRESS_TAG, BLOCKQUOTE_TAG, CENTER_TAG, DIV_TAG, DL_TAG, FIELDSET_TAG, H1_TAG, H2_TAG, H3_TAG, H4_TAG, H5_TAG, H6_TAG, HR_TAG, LI_TAG, NOSCRIPT_TAG, OL_TAG, PRE_TAG, TD_TAG, TH_TAG, UL_TAG];
HeadTags = [BASE_TAG, LINK_TAG, META_TAG, SCRIPT_TAG, STYLE_TAG, TITLE_TAG];
{Elements forbidden from having an end tag, and therefore are empty; from HTML 4.01 spec}
EmptyTags = [AREA_TAG, BASE_TAG, BASEFONT_TAG, BR_TAG, COL_TAG, FRAME_TAG, HR_TAG, IMG_TAG, INPUT_TAG, ISINDEX_TAG, LINK_TAG, META_TAG, PARAM_TAG];
PreserveWhiteSpaceTags = [PRE_TAG];
NeedFindParentTags = [COL_TAG, COLGROUP_TAG, DD_TAG, DT_TAG, LI_TAG, OPTION_TAG, P_TAG, TABLE_TAG, TBODY_TAG, TD_TAG, TFOOT_TAG, TH_TAG, THEAD_TAG, TR_TAG];
ListItemParentTags = [DIR_TAG, MENU_TAG, OL_TAG, UL_TAG];
DefItemParentTags = [DL_TAG];
TableSectionParentTags = [TABLE_TAG];
ColParentTags = [COLGROUP_TAG];
RowParentTags = [TABLE_TAG, TBODY_TAG, TFOOT_TAG, THEAD_TAG];
CellParentTags = [TR_TAG];
OptionParentTags = [OPTGROUP_TAG, SELECT_TAG];
implementation
uses
SysUtils;
constructor THtmlTag.Create(const AName: TDomString; ANumber: Integer; AParserFlags, AFormatterFlags: THtmlTagFlags);
begin
inherited Create;
FName := AName;
FNumber := ANumber
end;
constructor THtmlTagList.Create;
begin
inherited Create;
FList := TList.Create;
FList.Capacity := MAX_TAGS_COUNT;
FList.Add(THtmlTag.Create('a', A_TAG, [], []));
FList.Add(THtmlTag.Create('abbr', ABBR_TAG, [], []));
FList.Add(THtmlTag.Create('acronym', ACRONYM_TAG, [], []));
FList.Add(THtmlTag.Create('address', ADDRESS_TAG, [], []));
FList.Add(THtmlTag.Create('applet', APPLET_TAG, [], []));
FList.Add(THtmlTag.Create('area', AREA_TAG, [], []));
FList.Add(THtmlTag.Create('b', B_TAG, [], []));
FList.Add(THtmlTag.Create('base', BASE_TAG, [], []));
FList.Add(THtmlTag.Create('basefont', BASEFONT_TAG, [], []));
FList.Add(THtmlTag.Create('bdo', BDO_TAG, [], []));
FList.Add(THtmlTag.Create('big', BIG_TAG, [], []));
FList.Add(THtmlTag.Create('blockquote', BLOCKQUOTE_TAG, [], []));
FList.Add(THtmlTag.Create('body', BODY_TAG, [], []));
FList.Add(THtmlTag.Create('br', BR_TAG, [], []));
FList.Add(THtmlTag.Create('button', BUTTON_TAG, [], []));
FList.Add(THtmlTag.Create('caption', CAPTION_TAG, [], []));
FList.Add(THtmlTag.Create('center', CENTER_TAG, [], []));
FList.Add(THtmlTag.Create('cite', CITE_TAG, [], []));
FList.Add(THtmlTag.Create('code', CODE_TAG, [], []));
FList.Add(THtmlTag.Create('col', COL_TAG, [], []));
FList.Add(THtmlTag.Create('colgroup', COLGROUP_TAG, [], []));
FList.Add(THtmlTag.Create('dd', DD_TAG, [], []));
FList.Add(THtmlTag.Create('del', DEL_TAG, [], []));
FList.Add(THtmlTag.Create('dfn', DFN_TAG, [], []));
FList.Add(THtmlTag.Create('dir', DIR_TAG, [], []));
FList.Add(THtmlTag.Create('div', DIV_TAG, [], []));
FList.Add(THtmlTag.Create('dl', DL_TAG, [], []));
FList.Add(THtmlTag.Create('dt', DT_TAG, [], []));
FList.Add(THtmlTag.Create('em', EM_TAG, [], []));
FList.Add(THtmlTag.Create('fieldset', FIELDSET_TAG, [], []));
FList.Add(THtmlTag.Create('font', FONT_TAG, [], []));
FList.Add(THtmlTag.Create('form', FORM_TAG, [], []));
FList.Add(THtmlTag.Create('frame', FRAME_TAG, [], []));
FList.Add(THtmlTag.Create('frameset', FRAMESET_TAG, [], []));
FList.Add(THtmlTag.Create('h1', H1_TAG, [], []));
FList.Add(THtmlTag.Create('h2', H2_TAG, [], []));
FList.Add(THtmlTag.Create('h3', H3_TAG, [], []));
FList.Add(THtmlTag.Create('h4', H4_TAG, [], []));
FList.Add(THtmlTag.Create('h5', H5_TAG, [], []));
FList.Add(THtmlTag.Create('h6', H6_TAG, [], []));
FList.Add(THtmlTag.Create('head', HEAD_TAG, [], []));
FList.Add(THtmlTag.Create('hr', HR_TAG, [], []));
FList.Add(THtmlTag.Create('html', HTML_TAG, [], []));
FList.Add(THtmlTag.Create('i', I_TAG, [], []));
FList.Add(THtmlTag.Create('iframe', IFRAME_TAG, [], []));
FList.Add(THtmlTag.Create('img', IMG_TAG, [], []));
FList.Add(THtmlTag.Create('input', INPUT_TAG, [], []));
FList.Add(THtmlTag.Create('ins', INS_TAG, [], []));
FList.Add(THtmlTag.Create('isindex', ISINDEX_TAG, [], []));
FList.Add(THtmlTag.Create('kbd', KBD_TAG, [], []));
FList.Add(THtmlTag.Create('label', LABEL_TAG, [], []));
FList.Add(THtmlTag.Create('legend', LEGEND_TAG, [], []));
FList.Add(THtmlTag.Create('li', LI_TAG, [], []));
FList.Add(THtmlTag.Create('link', LINK_TAG, [], []));
FList.Add(THtmlTag.Create('map', MAP_TAG, [], []));
FList.Add(THtmlTag.Create('menu', MENU_TAG, [], []));
FList.Add(THtmlTag.Create('meta', META_TAG, [], []));
FList.Add(THtmlTag.Create('nobr', NOBR_TAG, [], []));
FList.Add(THtmlTag.Create('noframes', NOFRAMES_TAG, [], []));
FList.Add(THtmlTag.Create('noscript', NOSCRIPT_TAG, [], []));
FList.Add(THtmlTag.Create('object', OBJECT_TAG, [], []));
FList.Add(THtmlTag.Create('ol', OL_TAG, [], []));
FList.Add(THtmlTag.Create('optgroup', OPTGROUP_TAG, [], []));
FList.Add(THtmlTag.Create('option', OPTION_TAG, [], []));
FList.Add(THtmlTag.Create('p', P_TAG, [], []));
FList.Add(THtmlTag.Create('param', PARAM_TAG, [], []));
FList.Add(THtmlTag.Create('pre', PRE_TAG, [], []));
FList.Add(THtmlTag.Create('q', Q_TAG, [], []));
FList.Add(THtmlTag.Create('s', S_TAG, [], []));
FList.Add(THtmlTag.Create('samp', SAMP_TAG, [], []));
FList.Add(THtmlTag.Create('script', SCRIPT_TAG, [], []));
FList.Add(THtmlTag.Create('select', SELECT_TAG, [], []));
FList.Add(THtmlTag.Create('small', SMALL_TAG, [], []));
FList.Add(THtmlTag.Create('span', SPAN_TAG, [], []));
FList.Add(THtmlTag.Create('strike', STRIKE_TAG, [], []));
FList.Add(THtmlTag.Create('strong', STRONG_TAG, [], []));
FList.Add(THtmlTag.Create('style', STYLE_TAG, [], []));
FList.Add(THtmlTag.Create('sub', SUB_TAG, [], []));
FList.Add(THtmlTag.Create('sup', SUP_TAG, [], []));
FList.Add(THtmlTag.Create('table', TABLE_TAG, [], []));
FList.Add(THtmlTag.Create('tbody', TBODY_TAG, [], []));
FList.Add(THtmlTag.Create('td', TD_TAG, [], []));
FList.Add(THtmlTag.Create('textarea', TEXTAREA_TAG, [], []));
FList.Add(THtmlTag.Create('tfoot', TFOOT_TAG, [], []));
FList.Add(THtmlTag.Create('th', TH_TAG, [], []));
FList.Add(THtmlTag.Create('thead', THEAD_TAG, [], []));
FList.Add(THtmlTag.Create('title', TITLE_TAG, [], []));
FList.Add(THtmlTag.Create('tr', TR_TAG, [], []));
FList.Add(THtmlTag.Create('tt', TT_TAG, [], []));
FList.Add(THtmlTag.Create('u', U_TAG, [], []));
FList.Add(THtmlTag.Create('ul', UL_TAG, [], []));
FList.Add(THtmlTag.Create('var', VAR_TAG, [], []));
FUnknownTag := THtmlTag.Create('', UNKNOWN_TAG, [], [])
end;
destructor THtmlTagList.Destroy;
var
I: Integer;
begin
for I := FList.Count - 1 downto 0 do
THtmlTag(FList[I]).Free;
FList.Free;
FUnknownTag.Free;
inherited Destroy
end;
function THtmlTagList.GetTag(Compare: TCompareTag): THtmlTag;
var
I, Low, High, Rel: Integer;
begin
Low := -1;
High := FList.Count - 1;
while High - Low > 1 do
begin
I := (High + Low) div 2;
Result := FList[I];
Rel := Compare(Result);
if Rel < 0 then
High := I
else
if Rel > 0 then
Low := I
else
Exit
end;
if High >= 0 then
begin
Result := FList[High];
if Compare(Result) = 0 then
Exit
end;
Result := nil
end;
function THtmlTagList.CompareName(Tag: THtmlTag): Integer;
begin
Result := CompareStr(FSearchName, Tag.Name)
end;
function THtmlTagList.CompareNumber(Tag: THtmlTag): Integer;
begin
Result := FSearchNumber - Tag.Number
end;
function THtmlTagList.GetTagByName(const Name: TDomString): THtmlTag;
begin
FSearchName := Name;
Result := GetTag(CompareName);
if Result = nil then
Result := FUnknownTag
end;
function THtmlTagList.GetTagByNumber(Number: Integer): THtmlTag;
begin
FSearchNumber := Number;
Result := GetTag(CompareNumber)
end;
function TURLSchemes.Add(const S: String): Integer;
begin
if Length(S) > FMaxLen then
FMaxLen := Length(S);
Result := inherited Add(S)
end;
function TURLSchemes.IsURL(const S: String): Boolean;
begin
Result := IndexOf(LowerCase(S)) >= 0
end;
function TURLSchemes.GetScheme(const S: String): String;
const
SchemeChars = [Ord('A')..Ord('Z'), Ord('a')..Ord('z')];
var
I: Integer;
begin
Result := '';
for I := 1 to MaxLen + 1 do
begin
if I > Length(S) then
Exit;
if S[I] = ':' then
begin
if IsURL(Copy(S, 1, I - 1)) then
Result := Copy(S, 1, I - 1);
Exit
end
end
end;
initialization
HtmlTagList := THtmlTagList.Create;
URLSchemes := TURLSchemes.Create;
URLSchemes.Add('http');
URLSchemes.Add('https');
URLSchemes.Add('ftp');
URLSchemes.Add('mailto');
URLSchemes.Add('news');
URLSchemes.Add('nntp');
URLSchemes.Add('gopher');
finalization
HtmlTagList.Free;
URLSchemes.Free
end.