| // Copyright 2013 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "components/autofill/content/renderer/form_autofill_util.h" |
| |
| #include <map> |
| #include <set> |
| |
| #include "base/command_line.h" |
| #include "base/logging.h" |
| #include "base/memory/scoped_vector.h" |
| #include "base/strings/string_number_conversions.h" |
| #include "base/strings/string_util.h" |
| #include "base/strings/utf_string_conversions.h" |
| #include "build/build_config.h" |
| #include "components/autofill/core/common/autofill_data_validation.h" |
| #include "components/autofill/core/common/autofill_regexes.h" |
| #include "components/autofill/core/common/autofill_switches.h" |
| #include "components/autofill/core/common/autofill_util.h" |
| #include "components/autofill/core/common/form_data.h" |
| #include "components/autofill/core/common/form_field_data.h" |
| #include "third_party/WebKit/public/platform/URLConversion.h" |
| #include "third_party/WebKit/public/platform/WebString.h" |
| #include "third_party/WebKit/public/platform/WebVector.h" |
| #include "third_party/WebKit/public/web/WebDocument.h" |
| #include "third_party/WebKit/public/web/WebElement.h" |
| #include "third_party/WebKit/public/web/WebElementCollection.h" |
| #include "third_party/WebKit/public/web/WebFormControlElement.h" |
| #include "third_party/WebKit/public/web/WebFormElement.h" |
| #include "third_party/WebKit/public/web/WebInputElement.h" |
| #include "third_party/WebKit/public/web/WebLabelElement.h" |
| #include "third_party/WebKit/public/web/WebLocalFrame.h" |
| #include "third_party/WebKit/public/web/WebNode.h" |
| #include "third_party/WebKit/public/web/WebOptionElement.h" |
| #include "third_party/WebKit/public/web/WebSelectElement.h" |
| |
| using blink::WebDocument; |
| using blink::WebElement; |
| using blink::WebElementCollection; |
| using blink::WebFormControlElement; |
| using blink::WebFormElement; |
| using blink::WebFrame; |
| using blink::WebInputElement; |
| using blink::WebLabelElement; |
| using blink::WebNode; |
| using blink::WebOptionElement; |
| using blink::WebSelectElement; |
| using blink::WebString; |
| using blink::WebVector; |
| |
| namespace autofill { |
| namespace form_util { |
| |
| const size_t kMaxParseableFields = 200; |
| |
| namespace { |
| |
| // A bit field mask for FillForm functions to not fill some fields. |
| enum FieldFilterMask { |
| FILTER_NONE = 0, |
| FILTER_DISABLED_ELEMENTS = 1 << 0, |
| FILTER_READONLY_ELEMENTS = 1 << 1, |
| // Filters non-focusable elements with the exception of select elements, which |
| // are sometimes made non-focusable because they are present for accessibility |
| // while a prettier, non-<select> dropdown is shown. We still want to autofill |
| // the non-focusable <select>. |
| FILTER_NON_FOCUSABLE_ELEMENTS = 1 << 2, |
| FILTER_ALL_NON_EDITABLE_ELEMENTS = FILTER_DISABLED_ELEMENTS | |
| FILTER_READONLY_ELEMENTS | |
| FILTER_NON_FOCUSABLE_ELEMENTS, |
| }; |
| |
| void TruncateString(base::string16* str, size_t max_length) { |
| if (str->length() > max_length) |
| str->resize(max_length); |
| } |
| |
| bool IsOptionElement(const WebElement& element) { |
| CR_DEFINE_STATIC_LOCAL(WebString, kOption, ("option")); |
| return element.hasHTMLTagName(kOption); |
| } |
| |
| bool IsScriptElement(const WebElement& element) { |
| CR_DEFINE_STATIC_LOCAL(WebString, kScript, ("script")); |
| return element.hasHTMLTagName(kScript); |
| } |
| |
| bool IsNoScriptElement(const WebElement& element) { |
| CR_DEFINE_STATIC_LOCAL(WebString, kNoScript, ("noscript")); |
| return element.hasHTMLTagName(kNoScript); |
| } |
| |
| bool HasTagName(const WebNode& node, const blink::WebString& tag) { |
| return node.isElementNode() && node.toConst<WebElement>().hasHTMLTagName(tag); |
| } |
| |
| bool IsAutofillableElement(const WebFormControlElement& element) { |
| const WebInputElement* input_element = toWebInputElement(&element); |
| return IsAutofillableInputElement(input_element) || |
| IsSelectElement(element) || |
| IsTextAreaElement(element); |
| } |
| |
| bool IsElementInControlElementSet( |
| const WebElement& element, |
| const std::vector<WebFormControlElement>& control_elements) { |
| if (!element.isFormControlElement()) |
| return false; |
| const WebFormControlElement form_control_element = |
| element.toConst<WebFormControlElement>(); |
| return std::find(control_elements.begin(), |
| control_elements.end(), |
| form_control_element) != control_elements.end(); |
| } |
| |
| bool IsElementInsideFormOrFieldSet(const WebElement& element) { |
| for (WebNode parent_node = element.parentNode(); |
| !parent_node.isNull(); |
| parent_node = parent_node.parentNode()) { |
| if (!parent_node.isElementNode()) |
| continue; |
| |
| WebElement cur_element = parent_node.to<WebElement>(); |
| if (cur_element.hasHTMLTagName("form") || |
| cur_element.hasHTMLTagName("fieldset")) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| // Returns true if |node| is an element and it is a container type that |
| // InferLabelForElement() can traverse. |
| bool IsTraversableContainerElement(const WebNode& node) { |
| if (!node.isElementNode()) |
| return false; |
| |
| const WebElement element = node.toConst<WebElement>(); |
| return element.hasHTMLTagName("dd") || |
| element.hasHTMLTagName("div") || |
| element.hasHTMLTagName("fieldset") || |
| element.hasHTMLTagName("li") || |
| element.hasHTMLTagName("td") || |
| element.hasHTMLTagName("table"); |
| } |
| |
| // Returns the colspan for a <td> / <th>. Defaults to 1. |
| size_t CalculateTableCellColumnSpan(const WebElement& element) { |
| DCHECK(element.hasHTMLTagName("td") || element.hasHTMLTagName("th")); |
| |
| size_t span = 1; |
| if (element.hasAttribute("colspan")) { |
| base::string16 colspan = element.getAttribute("colspan"); |
| // Do not check return value to accept imperfect conversions. |
| base::StringToSizeT(colspan, &span); |
| // Handle overflow. |
| if (span == std::numeric_limits<size_t>::max()) |
| span = 1; |
| span = std::max(span, static_cast<size_t>(1)); |
| } |
| |
| return span; |
| } |
| |
| // Appends |suffix| to |prefix| so that any intermediary whitespace is collapsed |
| // to a single space. If |force_whitespace| is true, then the resulting string |
| // is guaranteed to have a space between |prefix| and |suffix|. Otherwise, the |
| // result includes a space only if |prefix| has trailing whitespace or |suffix| |
| // has leading whitespace. |
| // A few examples: |
| // * CombineAndCollapseWhitespace("foo", "bar", false) -> "foobar" |
| // * CombineAndCollapseWhitespace("foo", "bar", true) -> "foo bar" |
| // * CombineAndCollapseWhitespace("foo ", "bar", false) -> "foo bar" |
| // * CombineAndCollapseWhitespace("foo", " bar", false) -> "foo bar" |
| // * CombineAndCollapseWhitespace("foo", " bar", true) -> "foo bar" |
| // * CombineAndCollapseWhitespace("foo ", " bar", false) -> "foo bar" |
| // * CombineAndCollapseWhitespace(" foo", "bar ", false) -> " foobar " |
| // * CombineAndCollapseWhitespace(" foo", "bar ", true) -> " foo bar " |
| const base::string16 CombineAndCollapseWhitespace( |
| const base::string16& prefix, |
| const base::string16& suffix, |
| bool force_whitespace) { |
| base::string16 prefix_trimmed; |
| base::TrimPositions prefix_trailing_whitespace = |
| base::TrimWhitespace(prefix, base::TRIM_TRAILING, &prefix_trimmed); |
| |
| // Recursively compute the children's text. |
| base::string16 suffix_trimmed; |
| base::TrimPositions suffix_leading_whitespace = |
| base::TrimWhitespace(suffix, base::TRIM_LEADING, &suffix_trimmed); |
| |
| if (prefix_trailing_whitespace || suffix_leading_whitespace || |
| force_whitespace) { |
| return prefix_trimmed + base::ASCIIToUTF16(" ") + suffix_trimmed; |
| } else { |
| return prefix_trimmed + suffix_trimmed; |
| } |
| } |
| |
| // This is a helper function for the FindChildText() function (see below). |
| // Search depth is limited with the |depth| parameter. |
| // |divs_to_skip| is a list of <div> tags to ignore if encountered. |
| base::string16 FindChildTextInner(const WebNode& node, |
| int depth, |
| const std::set<WebNode>& divs_to_skip) { |
| if (depth <= 0 || node.isNull()) |
| return base::string16(); |
| |
| // Skip over comments. |
| if (node.isCommentNode()) |
| return FindChildTextInner(node.nextSibling(), depth - 1, divs_to_skip); |
| |
| if (!node.isElementNode() && !node.isTextNode()) |
| return base::string16(); |
| |
| // Ignore elements known not to contain inferable labels. |
| if (node.isElementNode()) { |
| const WebElement element = node.toConst<WebElement>(); |
| if (IsOptionElement(element) || |
| IsScriptElement(element) || |
| IsNoScriptElement(element) || |
| (element.isFormControlElement() && |
| IsAutofillableElement(element.toConst<WebFormControlElement>()))) { |
| return base::string16(); |
| } |
| |
| if (element.hasHTMLTagName("div") && ContainsKey(divs_to_skip, node)) |
| return base::string16(); |
| } |
| |
| // Extract the text exactly at this node. |
| base::string16 node_text = node.nodeValue(); |
| |
| // Recursively compute the children's text. |
| // Preserve inter-element whitespace separation. |
| base::string16 child_text = |
| FindChildTextInner(node.firstChild(), depth - 1, divs_to_skip); |
| bool add_space = node.isTextNode() && node_text.empty(); |
| node_text = CombineAndCollapseWhitespace(node_text, child_text, add_space); |
| |
| // Recursively compute the siblings' text. |
| // Again, preserve inter-element whitespace separation. |
| base::string16 sibling_text = |
| FindChildTextInner(node.nextSibling(), depth - 1, divs_to_skip); |
| add_space = node.isTextNode() && node_text.empty(); |
| node_text = CombineAndCollapseWhitespace(node_text, sibling_text, add_space); |
| |
| return node_text; |
| } |
| |
| // Same as FindChildText() below, but with a list of div nodes to skip. |
| // TODO(thestig): See if other FindChildText() callers can benefit from this. |
| base::string16 FindChildTextWithIgnoreList( |
| const WebNode& node, |
| const std::set<WebNode>& divs_to_skip) { |
| if (node.isTextNode()) |
| return node.nodeValue(); |
| |
| WebNode child = node.firstChild(); |
| |
| const int kChildSearchDepth = 10; |
| base::string16 node_text = |
| FindChildTextInner(child, kChildSearchDepth, divs_to_skip); |
| base::TrimWhitespace(node_text, base::TRIM_ALL, &node_text); |
| return node_text; |
| } |
| |
| // Returns the aggregated values of the descendants of |element| that are |
| // non-empty text nodes. This is a faster alternative to |innerText()| for |
| // performance critical operations. It does a full depth-first search so can be |
| // used when the structure is not directly known. However, unlike with |
| // |innerText()|, the search depth and breadth are limited to a fixed threshold. |
| // Whitespace is trimmed from text accumulated at descendant nodes. |
| base::string16 FindChildText(const WebNode& node) { |
| return FindChildTextWithIgnoreList(node, std::set<WebNode>()); |
| } |
| |
| // Shared function for InferLabelFromPrevious() and InferLabelFromNext(). |
| base::string16 InferLabelFromSibling(const WebFormControlElement& element, |
| bool forward) { |
| base::string16 inferred_label; |
| WebNode sibling = element; |
| while (true) { |
| sibling = forward ? sibling.nextSibling() : sibling.previousSibling(); |
| if (sibling.isNull()) |
| break; |
| |
| // Skip over comments. |
| if (sibling.isCommentNode()) |
| continue; |
| |
| // Otherwise, only consider normal HTML elements and their contents. |
| if (!sibling.isElementNode() && !sibling.isTextNode()) |
| break; |
| |
| // A label might be split across multiple "lightweight" nodes. |
| // Coalesce any text contained in multiple consecutive |
| // (a) plain text nodes or |
| // (b) inline HTML elements that are essentially equivalent to text nodes. |
| CR_DEFINE_STATIC_LOCAL(WebString, kBold, ("b")); |
| CR_DEFINE_STATIC_LOCAL(WebString, kStrong, ("strong")); |
| CR_DEFINE_STATIC_LOCAL(WebString, kSpan, ("span")); |
| CR_DEFINE_STATIC_LOCAL(WebString, kFont, ("font")); |
| if (sibling.isTextNode() || |
| HasTagName(sibling, kBold) || HasTagName(sibling, kStrong) || |
| HasTagName(sibling, kSpan) || HasTagName(sibling, kFont)) { |
| base::string16 value = FindChildText(sibling); |
| // A text node's value will be empty if it is for a line break. |
| bool add_space = sibling.isTextNode() && value.empty(); |
| inferred_label = |
| CombineAndCollapseWhitespace(value, inferred_label, add_space); |
| continue; |
| } |
| |
| // If we have identified a partial label and have reached a non-lightweight |
| // element, consider the label to be complete. |
| base::string16 trimmed_label; |
| base::TrimWhitespace(inferred_label, base::TRIM_ALL, &trimmed_label); |
| if (!trimmed_label.empty()) |
| break; |
| |
| // <img> and <br> tags often appear between the input element and its |
| // label text, so skip over them. |
| CR_DEFINE_STATIC_LOCAL(WebString, kImage, ("img")); |
| CR_DEFINE_STATIC_LOCAL(WebString, kBreak, ("br")); |
| if (HasTagName(sibling, kImage) || HasTagName(sibling, kBreak)) |
| continue; |
| |
| // We only expect <p> and <label> tags to contain the full label text. |
| CR_DEFINE_STATIC_LOCAL(WebString, kPage, ("p")); |
| CR_DEFINE_STATIC_LOCAL(WebString, kLabel, ("label")); |
| if (HasTagName(sibling, kPage) || HasTagName(sibling, kLabel)) |
| inferred_label = FindChildText(sibling); |
| |
| break; |
| } |
| |
| base::TrimWhitespace(inferred_label, base::TRIM_ALL, &inferred_label); |
| return inferred_label; |
| } |
| |
| // Helper for |InferLabelForElement()| that infers a label, if possible, from |
| // a previous sibling of |element|, |
| // e.g. Some Text <input ...> |
| // or Some <span>Text</span> <input ...> |
| // or <p>Some Text</p><input ...> |
| // or <label>Some Text</label> <input ...> |
| // or Some Text <img><input ...> |
| // or <b>Some Text</b><br/> <input ...>. |
| base::string16 InferLabelFromPrevious(const WebFormControlElement& element) { |
| return InferLabelFromSibling(element, false /* forward? */); |
| } |
| |
| // Same as InferLabelFromPrevious(), but in the other direction. |
| // Useful for cases like: <span><input type="checkbox">Label For Checkbox</span> |
| base::string16 InferLabelFromNext(const WebFormControlElement& element) { |
| return InferLabelFromSibling(element, true /* forward? */); |
| } |
| |
| // Helper for |InferLabelForElement()| that infers a label, if possible, from |
| // the placeholder text. e.g. <input placeholder="foo"> |
| base::string16 InferLabelFromPlaceholder(const WebFormControlElement& element) { |
| CR_DEFINE_STATIC_LOCAL(WebString, kPlaceholder, ("placeholder")); |
| if (element.hasAttribute(kPlaceholder)) |
| return element.getAttribute(kPlaceholder); |
| |
| return base::string16(); |
| } |
| |
| // Helper for |InferLabelForElement()| that infers a label, from |
| // the value attribute when it is present and user has not typed in (if |
| // element's value attribute is same as the element's value). |
| base::string16 InferLabelFromValueAttr(const WebFormControlElement& element) { |
| CR_DEFINE_STATIC_LOCAL(WebString, kValue, ("value")); |
| if (element.hasAttribute(kValue) && element.getAttribute(kValue) == |
| element.value()) { |
| return element.getAttribute(kValue); |
| } |
| |
| return base::string16(); |
| } |
| |
| // Helper for |InferLabelForElement()| that infers a label, if possible, from |
| // enclosing list item, |
| // e.g. <li>Some Text<input ...><input ...><input ...></li> |
| base::string16 InferLabelFromListItem(const WebFormControlElement& element) { |
| WebNode parent = element.parentNode(); |
| CR_DEFINE_STATIC_LOCAL(WebString, kListItem, ("li")); |
| while (!parent.isNull() && parent.isElementNode() && |
| !parent.to<WebElement>().hasHTMLTagName(kListItem)) { |
| parent = parent.parentNode(); |
| } |
| |
| if (!parent.isNull() && HasTagName(parent, kListItem)) |
| return FindChildText(parent); |
| |
| return base::string16(); |
| } |
| |
| // Helper for |InferLabelForElement()| that infers a label, if possible, from |
| // enclosing label, |
| // e.g. <label>Some Text<input ...><input ...><input ...></label> |
| base::string16 InferLabelFromEnclosingLabel( |
| const WebFormControlElement& element) { |
| WebNode parent = element.parentNode(); |
| CR_DEFINE_STATIC_LOCAL(WebString, kLabel, ("label")); |
| while (!parent.isNull() && parent.isElementNode() && |
| !parent.to<WebElement>().hasHTMLTagName(kLabel)) { |
| parent = parent.parentNode(); |
| } |
| |
| if (!parent.isNull() && HasTagName(parent, kLabel)) |
| return FindChildText(parent); |
| |
| return base::string16(); |
| } |
| |
| // Helper for |InferLabelForElement()| that infers a label, if possible, from |
| // surrounding table structure, |
| // e.g. <tr><td>Some Text</td><td><input ...></td></tr> |
| // or <tr><th>Some Text</th><td><input ...></td></tr> |
| // or <tr><td><b>Some Text</b></td><td><b><input ...></b></td></tr> |
| // or <tr><th><b>Some Text</b></th><td><b><input ...></b></td></tr> |
| base::string16 InferLabelFromTableColumn(const WebFormControlElement& element) { |
| CR_DEFINE_STATIC_LOCAL(WebString, kTableCell, ("td")); |
| WebNode parent = element.parentNode(); |
| while (!parent.isNull() && parent.isElementNode() && |
| !parent.to<WebElement>().hasHTMLTagName(kTableCell)) { |
| parent = parent.parentNode(); |
| } |
| |
| if (parent.isNull()) |
| return base::string16(); |
| |
| // Check all previous siblings, skipping non-element nodes, until we find a |
| // non-empty text block. |
| base::string16 inferred_label; |
| WebNode previous = parent.previousSibling(); |
| CR_DEFINE_STATIC_LOCAL(WebString, kTableHeader, ("th")); |
| while (inferred_label.empty() && !previous.isNull()) { |
| if (HasTagName(previous, kTableCell) || HasTagName(previous, kTableHeader)) |
| inferred_label = FindChildText(previous); |
| |
| previous = previous.previousSibling(); |
| } |
| |
| return inferred_label; |
| } |
| |
| // Helper for |InferLabelForElement()| that infers a label, if possible, from |
| // surrounding table structure, |
| // |
| // If there are multiple cells and the row with the input matches up with the |
| // previous row, then look for a specific cell within the previous row. |
| // e.g. <tr><td>Input 1 label</td><td>Input 2 label</td></tr> |
| // <tr><td><input name="input 1"></td><td><input name="input2"></td></tr> |
| // |
| // Otherwise, just look in the entire previous row. |
| // e.g. <tr><td>Some Text</td></tr><tr><td><input ...></td></tr> |
| base::string16 InferLabelFromTableRow(const WebFormControlElement& element) { |
| CR_DEFINE_STATIC_LOCAL(WebString, kTableCell, ("td")); |
| base::string16 inferred_label; |
| |
| // First find the <td> that contains |element|. |
| WebNode cell = element.parentNode(); |
| while (!cell.isNull()) { |
| if (cell.isElementNode() && |
| cell.to<WebElement>().hasHTMLTagName(kTableCell)) { |
| break; |
| } |
| cell = cell.parentNode(); |
| } |
| |
| // Not in a cell - bail out. |
| if (cell.isNull()) |
| return inferred_label; |
| |
| // Count the cell holding |element|. |
| size_t cell_count = CalculateTableCellColumnSpan(cell.to<WebElement>()); |
| size_t cell_position = 0; |
| size_t cell_position_end = cell_count - 1; |
| |
| // Count cells to the left to figure out |element|'s cell's position. |
| for (WebNode cell_it = cell.previousSibling(); |
| !cell_it.isNull(); |
| cell_it = cell_it.previousSibling()) { |
| if (cell_it.isElementNode() && |
| cell_it.to<WebElement>().hasHTMLTagName(kTableCell)) { |
| cell_position += CalculateTableCellColumnSpan(cell_it.to<WebElement>()); |
| } |
| } |
| |
| // Count cells to the right. |
| for (WebNode cell_it = cell.nextSibling(); |
| !cell_it.isNull(); |
| cell_it = cell_it.nextSibling()) { |
| if (cell_it.isElementNode() && |
| cell_it.to<WebElement>().hasHTMLTagName(kTableCell)) { |
| cell_count += CalculateTableCellColumnSpan(cell_it.to<WebElement>()); |
| } |
| } |
| |
| // Combine left + right. |
| cell_count += cell_position; |
| cell_position_end += cell_position; |
| |
| // Find the current row. |
| CR_DEFINE_STATIC_LOCAL(WebString, kTableRow, ("tr")); |
| WebNode parent = element.parentNode(); |
| while (!parent.isNull() && parent.isElementNode() && |
| !parent.to<WebElement>().hasHTMLTagName(kTableRow)) { |
| parent = parent.parentNode(); |
| } |
| |
| if (parent.isNull()) |
| return inferred_label; |
| |
| // Now find the previous row. |
| WebNode row_it = parent.previousSibling(); |
| while (!row_it.isNull()) { |
| if (row_it.isElementNode() && |
| row_it.to<WebElement>().hasHTMLTagName(kTableRow)) { |
| break; |
| } |
| row_it = row_it.previousSibling(); |
| } |
| |
| // If there exists a previous row, check its cells and size. If they align |
| // with the current row, infer the label from the cell above. |
| if (!row_it.isNull()) { |
| WebNode matching_cell; |
| size_t prev_row_count = 0; |
| WebNode prev_row_it = row_it.firstChild(); |
| CR_DEFINE_STATIC_LOCAL(WebString, kTableHeader, ("th")); |
| while (!prev_row_it.isNull()) { |
| if (prev_row_it.isElementNode()) { |
| WebElement prev_row_element = prev_row_it.to<WebElement>(); |
| if (prev_row_element.hasHTMLTagName(kTableCell) || |
| prev_row_element.hasHTMLTagName(kTableHeader)) { |
| size_t span = CalculateTableCellColumnSpan(prev_row_element); |
| size_t prev_row_count_end = prev_row_count + span - 1; |
| if (prev_row_count == cell_position && |
| prev_row_count_end == cell_position_end) { |
| matching_cell = prev_row_it; |
| } |
| prev_row_count += span; |
| } |
| } |
| prev_row_it = prev_row_it.nextSibling(); |
| } |
| if ((cell_count == prev_row_count) && !matching_cell.isNull()) { |
| inferred_label = FindChildText(matching_cell); |
| if (!inferred_label.empty()) |
| return inferred_label; |
| } |
| } |
| |
| // If there is no previous row, or if the previous row and current row do not |
| // align, check all previous siblings, skipping non-element nodes, until we |
| // find a non-empty text block. |
| WebNode previous = parent.previousSibling(); |
| while (inferred_label.empty() && !previous.isNull()) { |
| if (HasTagName(previous, kTableRow)) |
| inferred_label = FindChildText(previous); |
| |
| previous = previous.previousSibling(); |
| } |
| |
| return inferred_label; |
| } |
| |
| // Helper for |InferLabelForElement()| that infers a label, if possible, from |
| // a surrounding div table, |
| // e.g. <div>Some Text<span><input ...></span></div> |
| // e.g. <div>Some Text</div><div><input ...></div> |
| // |
| // Because this is already traversing the <div> structure, if it finds a <label> |
| // sibling along the way, infer from that <label>. |
| base::string16 InferLabelFromDivTable(const WebFormControlElement& element) { |
| WebNode node = element.parentNode(); |
| bool looking_for_parent = true; |
| std::set<WebNode> divs_to_skip; |
| |
| // Search the sibling and parent <div>s until we find a candidate label. |
| base::string16 inferred_label; |
| CR_DEFINE_STATIC_LOCAL(WebString, kDiv, ("div")); |
| CR_DEFINE_STATIC_LOCAL(WebString, kLabel, ("label")); |
| while (inferred_label.empty() && !node.isNull()) { |
| if (HasTagName(node, kDiv)) { |
| if (looking_for_parent) |
| inferred_label = FindChildTextWithIgnoreList(node, divs_to_skip); |
| else |
| inferred_label = FindChildText(node); |
| |
| // Avoid sibling DIVs that contain autofillable fields. |
| if (!looking_for_parent && !inferred_label.empty()) { |
| CR_DEFINE_STATIC_LOCAL(WebString, kSelector, |
| ("input, select, textarea")); |
| blink::WebExceptionCode ec = 0; |
| WebElement result_element = node.querySelector(kSelector, ec); |
| if (!result_element.isNull()) { |
| inferred_label.clear(); |
| divs_to_skip.insert(node); |
| } |
| } |
| |
| looking_for_parent = false; |
| } else if (!looking_for_parent && HasTagName(node, kLabel)) { |
| WebLabelElement label_element = node.to<WebLabelElement>(); |
| if (label_element.correspondingControl().isNull()) |
| inferred_label = FindChildText(node); |
| } else if (looking_for_parent && IsTraversableContainerElement(node)) { |
| // If the element is in a non-div container, its label most likely is too. |
| break; |
| } |
| |
| if (node.previousSibling().isNull()) { |
| // If there are no more siblings, continue walking up the tree. |
| looking_for_parent = true; |
| } |
| |
| node = looking_for_parent ? node.parentNode() : node.previousSibling(); |
| } |
| |
| return inferred_label; |
| } |
| |
| // Helper for |InferLabelForElement()| that infers a label, if possible, from |
| // a surrounding definition list, |
| // e.g. <dl><dt>Some Text</dt><dd><input ...></dd></dl> |
| // e.g. <dl><dt><b>Some Text</b></dt><dd><b><input ...></b></dd></dl> |
| base::string16 InferLabelFromDefinitionList( |
| const WebFormControlElement& element) { |
| CR_DEFINE_STATIC_LOCAL(WebString, kDefinitionData, ("dd")); |
| WebNode parent = element.parentNode(); |
| while (!parent.isNull() && parent.isElementNode() && |
| !parent.to<WebElement>().hasHTMLTagName(kDefinitionData)) |
| parent = parent.parentNode(); |
| |
| if (parent.isNull() || !HasTagName(parent, kDefinitionData)) |
| return base::string16(); |
| |
| // Skip by any intervening text nodes. |
| WebNode previous = parent.previousSibling(); |
| while (!previous.isNull() && previous.isTextNode()) |
| previous = previous.previousSibling(); |
| |
| CR_DEFINE_STATIC_LOCAL(WebString, kDefinitionTag, ("dt")); |
| if (previous.isNull() || !HasTagName(previous, kDefinitionTag)) |
| return base::string16(); |
| |
| return FindChildText(previous); |
| } |
| |
| // Returns the element type for all ancestor nodes in CAPS, starting with the |
| // parent node. |
| std::vector<std::string> AncestorTagNames( |
| const WebFormControlElement& element) { |
| std::vector<std::string> tag_names; |
| for (WebNode parent_node = element.parentNode(); |
| !parent_node.isNull(); |
| parent_node = parent_node.parentNode()) { |
| if (!parent_node.isElementNode()) |
| continue; |
| |
| tag_names.push_back(parent_node.to<WebElement>().tagName().utf8()); |
| } |
| return tag_names; |
| } |
| |
| bool IsLabelValid(base::StringPiece16 inferred_label, |
| const std::vector<base::char16>& stop_words) { |
| // If |inferred_label| has any character other than those in |stop_words|. |
| auto first_non_stop_word = std::find_if(inferred_label.begin(), |
| inferred_label.end(), [&stop_words](base::char16 c) { |
| return !ContainsValue(stop_words, c); |
| }); |
| return first_non_stop_word != inferred_label.end(); |
| } |
| |
| // Infers corresponding label for |element| from surrounding context in the DOM, |
| // e.g. the contents of the preceding <p> tag or text element. |
| base::string16 InferLabelForElement(const WebFormControlElement& element, |
| const std::vector<base::char16>& stop_words) { |
| base::string16 inferred_label; |
| |
| if (IsCheckableElement(toWebInputElement(&element))) { |
| inferred_label = InferLabelFromNext(element); |
| if (IsLabelValid(inferred_label, stop_words)) |
| return inferred_label; |
| } |
| |
| inferred_label = InferLabelFromPrevious(element); |
| if (IsLabelValid(inferred_label, stop_words)) |
| return inferred_label; |
| |
| // If we didn't find a label, check for placeholder text. |
| inferred_label = InferLabelFromPlaceholder(element); |
| if (IsLabelValid(inferred_label, stop_words)) |
| return inferred_label; |
| |
| // For all other searches that involve traversing up the tree, the search |
| // order is based on which tag is the closest ancestor to |element|. |
| std::vector<std::string> tag_names = AncestorTagNames(element); |
| std::set<std::string> seen_tag_names; |
| for (const std::string& tag_name : tag_names) { |
| if (ContainsKey(seen_tag_names, tag_name)) |
| continue; |
| |
| seen_tag_names.insert(tag_name); |
| if (tag_name == "LABEL") { |
| inferred_label = InferLabelFromEnclosingLabel(element); |
| } else if (tag_name == "DIV") { |
| inferred_label = InferLabelFromDivTable(element); |
| } else if (tag_name == "TD") { |
| inferred_label = InferLabelFromTableColumn(element); |
| if (!IsLabelValid(inferred_label, stop_words)) |
| inferred_label = InferLabelFromTableRow(element); |
| } else if (tag_name == "DD") { |
| inferred_label = InferLabelFromDefinitionList(element); |
| } else if (tag_name == "LI") { |
| inferred_label = InferLabelFromListItem(element); |
| } else if (tag_name == "FIELDSET") { |
| break; |
| } |
| |
| if (IsLabelValid(inferred_label, stop_words)) |
| return inferred_label; |
| } |
| |
| // If we didn't find a label, check the value attr used as the placeholder. |
| inferred_label = InferLabelFromValueAttr(element); |
| if (IsLabelValid(inferred_label, stop_words)) |
| return inferred_label; |
| else |
| return base::string16(); |
| } |
| |
| // Fills |option_strings| with the values of the <option> elements present in |
| // |select_element|. |
| void GetOptionStringsFromElement(const WebSelectElement& select_element, |
| std::vector<base::string16>* option_values, |
| std::vector<base::string16>* option_contents) { |
| DCHECK(!select_element.isNull()); |
| |
| option_values->clear(); |
| option_contents->clear(); |
| WebVector<WebElement> list_items = select_element.listItems(); |
| |
| // Constrain the maximum list length to prevent a malicious site from DOS'ing |
| // the browser, without entirely breaking autocomplete for some extreme |
| // legitimate sites: http://crbug.com/49332 and http://crbug.com/363094 |
| if (list_items.size() > kMaxListSize) |
| return; |
| |
| option_values->reserve(list_items.size()); |
| option_contents->reserve(list_items.size()); |
| for (size_t i = 0; i < list_items.size(); ++i) { |
| if (IsOptionElement(list_items[i])) { |
| const WebOptionElement option = list_items[i].toConst<WebOptionElement>(); |
| option_values->push_back(option.value()); |
| option_contents->push_back(option.text()); |
| } |
| } |
| } |
| |
| // The callback type used by |ForEachMatchingFormField()|. |
| typedef void (*Callback)(const FormFieldData&, |
| bool, /* is_initiating_element */ |
| blink::WebFormControlElement*); |
| |
| void ForEachMatchingFormFieldCommon( |
| std::vector<WebFormControlElement>* control_elements, |
| const WebElement& initiating_element, |
| const FormData& data, |
| FieldFilterMask filters, |
| bool force_override, |
| const Callback& callback) { |
| DCHECK(control_elements); |
| if (control_elements->size() != data.fields.size()) { |
| // This case should be reachable only for pathological websites and tests, |
| // which add or remove form fields while the user is interacting with the |
| // Autofill popup. |
| return; |
| } |
| |
| // It's possible that the site has injected fields into the form after the |
| // page has loaded, so we can't assert that the size of the cached control |
| // elements is equal to the size of the fields in |form|. Fortunately, the |
| // one case in the wild where this happens, paypal.com signup form, the fields |
| // are appended to the end of the form and are not visible. |
| for (size_t i = 0; i < control_elements->size(); ++i) { |
| WebFormControlElement* element = &(*control_elements)[i]; |
| |
| if (base::string16(element->nameForAutofill()) != data.fields[i].name) { |
| // This case should be reachable only for pathological websites, which |
| // rename form fields while the user is interacting with the Autofill |
| // popup. I (isherman) am not aware of any such websites, and so am |
| // optimistically including a NOTREACHED(). If you ever trip this check, |
| // please file a bug against me. |
| NOTREACHED(); |
| continue; |
| } |
| |
| bool is_initiating_element = (*element == initiating_element); |
| |
| // Only autofill empty fields (or those with the field's default value |
| // attribute) and the field that initiated the filling, i.e. the field the |
| // user is currently editing and interacting with. |
| const WebInputElement* input_element = toWebInputElement(element); |
| CR_DEFINE_STATIC_LOCAL(WebString, kValue, ("value")); |
| CR_DEFINE_STATIC_LOCAL(WebString, kPlaceholder, ("placeholder")); |
| if (!force_override && !is_initiating_element && |
| // A text field, with a non-empty value that is NOT the value of the |
| // input field's "value" or "placeholder" attribute, is skipped. |
| (IsAutofillableInputElement(input_element) || |
| IsTextAreaElement(*element)) && |
| !element->value().isEmpty() && |
| (!element->hasAttribute(kValue) || |
| element->getAttribute(kValue) != element->value()) && |
| (!element->hasAttribute(kPlaceholder) || |
| element->getAttribute(kPlaceholder) != element->value())) |
| continue; |
| |
| if (((filters & FILTER_DISABLED_ELEMENTS) && !element->isEnabled()) || |
| ((filters & FILTER_READONLY_ELEMENTS) && element->isReadOnly()) || |
| // See description for FILTER_NON_FOCUSABLE_ELEMENTS. |
| ((filters & FILTER_NON_FOCUSABLE_ELEMENTS) && !element->isFocusable() && |
| !IsSelectElement(*element))) |
| continue; |
| |
| callback(data.fields[i], is_initiating_element, element); |
| } |
| } |
| |
| // For each autofillable field in |data| that matches a field in the |form|, |
| // the |callback| is invoked with the corresponding |form| field data. |
| void ForEachMatchingFormField(const WebFormElement& form_element, |
| const WebElement& initiating_element, |
| const FormData& data, |
| FieldFilterMask filters, |
| bool force_override, |
| const Callback& callback) { |
| std::vector<WebFormControlElement> control_elements = |
| ExtractAutofillableElementsInForm(form_element); |
| ForEachMatchingFormFieldCommon(&control_elements, initiating_element, data, |
| filters, force_override, callback); |
| } |
| |
| // For each autofillable field in |data| that matches a field in the set of |
| // unowned autofillable form fields, the |callback| is invoked with the |
| // corresponding |data| field. |
| void ForEachMatchingUnownedFormField(const WebElement& initiating_element, |
| const FormData& data, |
| FieldFilterMask filters, |
| bool force_override, |
| const Callback& callback) { |
| if (initiating_element.isNull()) |
| return; |
| |
| std::vector<WebFormControlElement> control_elements = |
| GetUnownedAutofillableFormFieldElements( |
| initiating_element.document().all(), nullptr); |
| if (!IsElementInControlElementSet(initiating_element, control_elements)) |
| return; |
| |
| ForEachMatchingFormFieldCommon(&control_elements, initiating_element, data, |
| filters, force_override, callback); |
| } |
| |
| // Sets the |field|'s value to the value in |data|. |
| // Also sets the "autofilled" attribute, causing the background to be yellow. |
| void FillFormField(const FormFieldData& data, |
| bool is_initiating_node, |
| blink::WebFormControlElement* field) { |
| // Nothing to fill. |
| if (data.value.empty()) |
| return; |
| |
| if (!data.is_autofilled) |
| return; |
| |
| WebInputElement* input_element = toWebInputElement(field); |
| if (IsCheckableElement(input_element)) { |
| input_element->setChecked(data.is_checked, true); |
| } else { |
| base::string16 value = data.value; |
| if (IsTextInput(input_element) || IsMonthInput(input_element)) { |
| // If the maxlength attribute contains a negative value, maxLength() |
| // returns the default maxlength value. |
| TruncateString(&value, input_element->maxLength()); |
| } |
| field->setValue(value, true); |
| } |
| // Setting the form might trigger JavaScript, which is capable of |
| // destroying the frame. |
| if (!field->document().frame()) |
| return; |
| |
| field->setAutofilled(true); |
| |
| if (is_initiating_node && |
| ((IsTextInput(input_element) || IsMonthInput(input_element)) || |
| IsTextAreaElement(*field))) { |
| int length = field->value().length(); |
| field->setSelectionRange(length, length); |
| // Clear the current IME composition (the underline), if there is one. |
| field->document().frame()->unmarkText(); |
| } |
| } |
| |
| // Sets the |field|'s "suggested" (non JS visible) value to the value in |data|. |
| // Also sets the "autofilled" attribute, causing the background to be yellow. |
| void PreviewFormField(const FormFieldData& data, |
| bool is_initiating_node, |
| blink::WebFormControlElement* field) { |
| // Nothing to preview. |
| if (data.value.empty()) |
| return; |
| |
| if (!data.is_autofilled) |
| return; |
| |
| // Preview input, textarea and select fields. For input fields, excludes |
| // checkboxes and radio buttons, as there is no provision for |
| // setSuggestedCheckedValue in WebInputElement. |
| WebInputElement* input_element = toWebInputElement(field); |
| if (IsTextInput(input_element) || IsMonthInput(input_element)) { |
| // If the maxlength attribute contains a negative value, maxLength() |
| // returns the default maxlength value. |
| input_element->setSuggestedValue( |
| data.value.substr(0, input_element->maxLength())); |
| input_element->setAutofilled(true); |
| } else if (IsTextAreaElement(*field) || IsSelectElement(*field)) { |
| field->setSuggestedValue(data.value); |
| field->setAutofilled(true); |
| } |
| |
| if (is_initiating_node && |
| (IsTextInput(input_element) || IsTextAreaElement(*field))) { |
| // Select the part of the text that the user didn't type. |
| PreviewSuggestion(field->suggestedValue(), field->value(), field); |
| } |
| } |
| |
| // Extracts the fields from |control_elements| with |extract_mask| to |
| // |form_fields|. The extracted fields are also placed in |element_map|. |
| // |form_fields| and |element_map| should start out empty. |
| // |fields_extracted| should have as many elements as |control_elements|, |
| // initialized to false. |
| // Returns true if the number of fields extracted is within |
| // [1, kMaxParseableFields]. |
| bool ExtractFieldsFromControlElements( |
| const WebVector<WebFormControlElement>& control_elements, |
| ExtractMask extract_mask, |
| ScopedVector<FormFieldData>* form_fields, |
| std::vector<bool>* fields_extracted, |
| std::map<WebFormControlElement, FormFieldData*>* element_map) { |
| DCHECK(form_fields->empty()); |
| DCHECK(element_map->empty()); |
| DCHECK_EQ(control_elements.size(), fields_extracted->size()); |
| |
| for (size_t i = 0; i < control_elements.size(); ++i) { |
| const WebFormControlElement& control_element = control_elements[i]; |
| |
| if (!IsAutofillableElement(control_element)) |
| continue; |
| |
| // Create a new FormFieldData, fill it out and map it to the field's name. |
| FormFieldData* form_field = new FormFieldData; |
| WebFormControlElementToFormField(control_element, extract_mask, form_field); |
| form_fields->push_back(form_field); |
| (*element_map)[control_element] = form_field; |
| (*fields_extracted)[i] = true; |
| |
| // To avoid overly expensive computation, we impose a maximum number of |
| // allowable fields. |
| if (form_fields->size() > kMaxParseableFields) |
| return false; |
| } |
| |
| // Succeeded if fields were extracted. |
| return !form_fields->empty(); |
| } |
| |
| // For each label element, get the corresponding form control element, use the |
| // form control element's name as a key into the |
| // <WebFormControlElement, FormFieldData> map to find the previously created |
| // FormFieldData and set the FormFieldData's label to the |
| // label.firstChild().nodeValue() of the label element. |
| void MatchLabelsAndFields( |
| const WebElementCollection& labels, |
| std::map<WebFormControlElement, FormFieldData*>* element_map) { |
| CR_DEFINE_STATIC_LOCAL(WebString, kFor, ("for")); |
| CR_DEFINE_STATIC_LOCAL(WebString, kHidden, ("hidden")); |
| |
| for (WebElement item = labels.firstItem(); !item.isNull(); |
| item = labels.nextItem()) { |
| WebLabelElement label = item.to<WebLabelElement>(); |
| WebElement control = label.correspondingControl(); |
| FormFieldData* field_data = nullptr; |
| |
| if (control.isNull()) { |
| // Sometimes site authors will incorrectly specify the corresponding |
| // field element's name rather than its id, so we compensate here. |
| base::string16 element_name = label.getAttribute(kFor); |
| if (element_name.empty()) |
| continue; |
| // Look through the list for elements with this name. There can actually |
| // be more than one. In this case, the label may not be particularly |
| // useful, so just discard it. |
| for (const auto& iter : *element_map) { |
| if (iter.second->name == element_name) { |
| if (field_data) { |
| field_data = nullptr; |
| break; |
| } else { |
| field_data = iter.second; |
| } |
| } |
| } |
| } else if (control.isFormControlElement()) { |
| WebFormControlElement form_control = control.to<WebFormControlElement>(); |
| if (form_control.formControlType() == kHidden) |
| continue; |
| // Typical case: look up |field_data| in |element_map|. |
| auto iter = element_map->find(form_control); |
| if (iter == element_map->end()) |
| continue; |
| field_data = iter->second; |
| } |
| |
| if (!field_data) |
| continue; |
| |
| base::string16 label_text = FindChildText(label); |
| |
| // Concatenate labels because some sites might have multiple label |
| // candidates. |
| if (!field_data->label.empty() && !label_text.empty()) |
| field_data->label += base::ASCIIToUTF16(" "); |
| field_data->label += label_text; |
| } |
| } |
| |
| // Common function shared by WebFormElementToFormData() and |
| // UnownedFormElementsAndFieldSetsToFormData(). Either pass in: |
| // 1) |form_element| and an empty |fieldsets|. |
| // or |
| // 2) a NULL |form_element|. |
| // |
| // If |field| is not NULL, then |form_control_element| should be not NULL. |
| bool FormOrFieldsetsToFormData( |
| const blink::WebFormElement* form_element, |
| const blink::WebFormControlElement* form_control_element, |
| const std::vector<blink::WebElement>& fieldsets, |
| const WebVector<WebFormControlElement>& control_elements, |
| ExtractMask extract_mask, |
| FormData* form, |
| FormFieldData* field) { |
| CR_DEFINE_STATIC_LOCAL(WebString, kLabel, ("label")); |
| |
| if (form_element) |
| DCHECK(fieldsets.empty()); |
| if (field) |
| DCHECK(form_control_element); |
| |
| // A map from a FormFieldData's name to the FormFieldData itself. |
| std::map<WebFormControlElement, FormFieldData*> element_map; |
| |
| // The extracted FormFields. We use pointers so we can store them in |
| // |element_map|. |
| ScopedVector<FormFieldData> form_fields; |
| |
| // A vector of bools that indicate whether each field in the form meets the |
| // requirements and thus will be in the resulting |form|. |
| std::vector<bool> fields_extracted(control_elements.size(), false); |
| |
| if (!ExtractFieldsFromControlElements(control_elements, extract_mask, |
| &form_fields, &fields_extracted, |
| &element_map)) { |
| return false; |
| } |
| |
| if (form_element) { |
| // Loop through the label elements inside the form element. For each label |
| // element, get the corresponding form control element, use the form control |
| // element's name as a key into the <name, FormFieldData> map to find the |
| // previously created FormFieldData and set the FormFieldData's label to the |
| // label.firstChild().nodeValue() of the label element. |
| WebElementCollection labels = |
| form_element->getElementsByHTMLTagName(kLabel); |
| DCHECK(!labels.isNull()); |
| MatchLabelsAndFields(labels, &element_map); |
| } else { |
| // Same as the if block, but for all the labels in fieldsets. |
| for (size_t i = 0; i < fieldsets.size(); ++i) { |
| WebElementCollection labels = |
| fieldsets[i].getElementsByHTMLTagName(kLabel); |
| DCHECK(!labels.isNull()); |
| MatchLabelsAndFields(labels, &element_map); |
| } |
| } |
| |
| // List of characters a label can't be entirely made of (this list can grow). |
| // Since the term |stop_words| is a known text processing concept we use here |
| // it to refer to such characters. They are not to be confused with words. |
| std::vector<base::char16> stop_words; |
| stop_words.push_back(static_cast<base::char16>(' ')); |
| stop_words.push_back(static_cast<base::char16>('*')); |
| stop_words.push_back(static_cast<base::char16>(':')); |
| stop_words.push_back(static_cast<base::char16>('-')); |
| stop_words.push_back(static_cast<base::char16>(L'\u2013')); |
| stop_words.push_back(static_cast<base::char16>('(')); |
| stop_words.push_back(static_cast<base::char16>(')')); |
| |
| // Loop through the form control elements, extracting the label text from |
| // the DOM. We use the |fields_extracted| vector to make sure we assign the |
| // extracted label to the correct field, as it's possible |form_fields| will |
| // not contain all of the elements in |control_elements|. |
| for (size_t i = 0, field_idx = 0; |
| i < control_elements.size() && field_idx < form_fields.size(); ++i) { |
| // This field didn't meet the requirements, so don't try to find a label |
| // for it. |
| if (!fields_extracted[i]) |
| continue; |
| |
| const WebFormControlElement& control_element = control_elements[i]; |
| if (form_fields[field_idx]->label.empty()) { |
| form_fields[field_idx]->label = InferLabelForElement(control_element, |
| stop_words); |
| } |
| TruncateString(&form_fields[field_idx]->label, kMaxDataLength); |
| |
| if (field && *form_control_element == control_element) |
| *field = *form_fields[field_idx]; |
| |
| ++field_idx; |
| } |
| |
| // Copy the created FormFields into the resulting FormData object. |
| for (const auto& iter : form_fields) |
| form->fields.push_back(*iter); |
| return true; |
| } |
| |
| bool UnownedFormElementsAndFieldSetsToFormData( |
| const std::vector<blink::WebElement>& fieldsets, |
| const std::vector<blink::WebFormControlElement>& control_elements, |
| const blink::WebFormControlElement* element, |
| const blink::WebDocument& document, |
| ExtractMask extract_mask, |
| FormData* form, |
| FormFieldData* field) { |
| form->origin = GetCanonicalOriginForDocument(document); |
| form->is_form_tag = false; |
| |
| return FormOrFieldsetsToFormData(nullptr, element, fieldsets, |
| control_elements, extract_mask, form, field); |
| } |
| |
| GURL StripAuthAndParams(const GURL& gurl) { |
| // We want to keep the path but strip any authentication data, as well as |
| // query and ref portions of URL, for the form action and form origin. |
| GURL::Replacements rep; |
| rep.ClearUsername(); |
| rep.ClearPassword(); |
| rep.ClearQuery(); |
| rep.ClearRef(); |
| return gurl.ReplaceComponents(rep); |
| } |
| |
| } // namespace |
| |
| bool ExtractFormData(const WebFormElement& form_element, FormData* data) { |
| return WebFormElementToFormData( |
| form_element, WebFormControlElement(), |
| static_cast<form_util::ExtractMask>(form_util::EXTRACT_VALUE | |
| form_util::EXTRACT_OPTION_TEXT | |
| form_util::EXTRACT_OPTIONS), |
| data, NULL); |
| } |
| |
| bool IsFormVisible(blink::WebFrame* frame, |
| const GURL& canonical_action, |
| const GURL& canonical_origin, |
| const FormData& form_data) { |
| const GURL frame_origin = GetCanonicalOriginForDocument(frame->document()); |
| blink::WebVector<WebFormElement> forms; |
| frame->document().forms(forms); |
| |
| #if !defined(OS_ANDROID) |
| // Omitting the action attribute would result in |canonical_origin| for |
| // hierarchical schemes like http:, and in an empty URL for non-hierarchical |
| // schemes like about: or data: etc. |
| const bool action_is_empty = canonical_action.is_empty() |
| || canonical_action == canonical_origin; |
| #endif |
| |
| // Since empty or unspecified action fields are automatically set to page URL, |
| // action field for forms cannot be used for comparing (all forms with |
| // empty/unspecified actions have the same value). If an action field is set |
| // to the page URL, this method checks ALL fields of the form instead (using |
| // FormData.SameFormAs). This is also true if the action was set to the page |
| // URL on purpose. |
| for (const WebFormElement& form : forms) { |
| if (!AreFormContentsVisible(form)) |
| continue; |
| |
| GURL iter_canonical_action = GetCanonicalActionForForm(form); |
| #if !defined(OS_ANDROID) |
| bool form_action_is_empty = iter_canonical_action.is_empty() || |
| iter_canonical_action == frame_origin; |
| if (action_is_empty != form_action_is_empty) |
| continue; |
| |
| if (action_is_empty) { // Both actions are empty, compare all fields. |
| FormData extracted_form_data; |
| WebFormElementToFormData(form, WebFormControlElement(), EXTRACT_NONE, |
| &extracted_form_data, nullptr); |
| if (form_data.SameFormAs(extracted_form_data)) { |
| return true; // Form still exists. |
| } |
| } else { // Both actions are non-empty, compare actions only. |
| if (canonical_action == iter_canonical_action) { |
| return true; // Form still exists. |
| } |
| } |
| #else // OS_ANDROID |
| if (canonical_action == iter_canonical_action) { |
| return true; // Form still exists. |
| } |
| #endif |
| } |
| |
| return false; |
| } |
| |
| bool IsSomeControlElementVisible( |
| const WebVector<WebFormControlElement>& control_elements) { |
| for (const WebFormControlElement& control_element : control_elements) { |
| if (IsWebNodeVisible(control_element)) |
| return true; |
| } |
| return false; |
| } |
| |
| bool AreFormContentsVisible(const WebFormElement& form) { |
| WebVector<WebFormControlElement> control_elements; |
| form.getFormControlElements(control_elements); |
| return IsSomeControlElementVisible(control_elements); |
| } |
| |
| GURL GetCanonicalActionForForm(const WebFormElement& form) { |
| WebString action = form.action(); |
| if (action.isNull()) |
| action = WebString(""); // missing 'action' attribute implies current URL. |
| GURL full_action(form.document().completeURL(action)); |
| return StripAuthAndParams(full_action); |
| } |
| |
| GURL GetCanonicalOriginForDocument(const WebDocument& document) { |
| GURL full_origin(document.url()); |
| return StripAuthAndParams(full_origin); |
| } |
| |
| bool IsMonthInput(const WebInputElement* element) { |
| CR_DEFINE_STATIC_LOCAL(WebString, kMonth, ("month")); |
| return element && !element->isNull() && element->formControlType() == kMonth; |
| } |
| |
| // All text fields, including password fields, should be extracted. |
| bool IsTextInput(const WebInputElement* element) { |
| return element && !element->isNull() && element->isTextField(); |
| } |
| |
| bool IsSelectElement(const WebFormControlElement& element) { |
| // Static for improved performance. |
| CR_DEFINE_STATIC_LOCAL(WebString, kSelectOne, ("select-one")); |
| return !element.isNull() && element.formControlType() == kSelectOne; |
| } |
| |
| bool IsTextAreaElement(const WebFormControlElement& element) { |
| // Static for improved performance. |
| CR_DEFINE_STATIC_LOCAL(WebString, kTextArea, ("textarea")); |
| return !element.isNull() && element.formControlType() == kTextArea; |
| } |
| |
| bool IsCheckableElement(const WebInputElement* element) { |
| if (!element || element->isNull()) |
| return false; |
| |
| return element->isCheckbox() || element->isRadioButton(); |
| } |
| |
| bool IsAutofillableInputElement(const WebInputElement* element) { |
| return IsTextInput(element) || |
| IsMonthInput(element) || |
| IsCheckableElement(element); |
| } |
| |
| const base::string16 GetFormIdentifier(const WebFormElement& form) { |
| base::string16 identifier = form.name(); |
| CR_DEFINE_STATIC_LOCAL(WebString, kId, ("id")); |
| if (identifier.empty()) |
| identifier = form.getAttribute(kId); |
| |
| return identifier; |
| } |
| |
| bool IsWebNodeVisible(const blink::WebNode& node) { |
| // TODO(esprehn): This code doesn't really check if the node is visible, just |
| // if the node takes up space in the layout. Does it want to check opacity, |
| // transform, and visibility too? |
| if (!node.isElementNode()) |
| return false; |
| const WebElement element = node.toConst<WebElement>(); |
| return element.hasNonEmptyLayoutSize(); |
| } |
| |
| std::vector<blink::WebFormControlElement> ExtractAutofillableElementsFromSet( |
| const WebVector<WebFormControlElement>& control_elements) { |
| std::vector<blink::WebFormControlElement> autofillable_elements; |
| for (size_t i = 0; i < control_elements.size(); ++i) { |
| WebFormControlElement element = control_elements[i]; |
| if (!IsAutofillableElement(element)) |
| continue; |
| |
| autofillable_elements.push_back(element); |
| } |
| return autofillable_elements; |
| } |
| |
| std::vector<WebFormControlElement> ExtractAutofillableElementsInForm( |
| const WebFormElement& form_element) { |
| WebVector<WebFormControlElement> control_elements; |
| form_element.getFormControlElements(control_elements); |
| |
| return ExtractAutofillableElementsFromSet(control_elements); |
| } |
| |
| void WebFormControlElementToFormField(const WebFormControlElement& element, |
| ExtractMask extract_mask, |
| FormFieldData* field) { |
| DCHECK(field); |
| DCHECK(!element.isNull()); |
| CR_DEFINE_STATIC_LOCAL(WebString, kAutocomplete, ("autocomplete")); |
| CR_DEFINE_STATIC_LOCAL(WebString, kRole, ("role")); |
| CR_DEFINE_STATIC_LOCAL(WebString, kPlaceholder, ("placeholder")); |
| |
| // The label is not officially part of a WebFormControlElement; however, the |
| // labels for all form control elements are scraped from the DOM and set in |
| // WebFormElementToFormData. |
| field->name = element.nameForAutofill(); |
| field->form_control_type = element.formControlType().utf8(); |
| field->autocomplete_attribute = element.getAttribute(kAutocomplete).utf8(); |
| if (field->autocomplete_attribute.size() > kMaxDataLength) { |
| // Discard overly long attribute values to avoid DOS-ing the browser |
| // process. However, send over a default string to indicate that the |
| // attribute was present. |
| field->autocomplete_attribute = "x-max-data-length-exceeded"; |
| } |
| if (base::LowerCaseEqualsASCII( |
| base::StringPiece16(element.getAttribute(kRole)), "presentation")) |
| field->role = FormFieldData::ROLE_ATTRIBUTE_PRESENTATION; |
| |
| field->placeholder = element.getAttribute(kPlaceholder); |
| |
| if (!IsAutofillableElement(element)) |
| return; |
| |
| const WebInputElement* input_element = toWebInputElement(&element); |
| if (IsAutofillableInputElement(input_element) || |
| IsTextAreaElement(element) || |
| IsSelectElement(element)) { |
| field->is_autofilled = element.isAutofilled(); |
| field->is_focusable = element.isFocusable(); |
| field->should_autocomplete = element.autoComplete(); |
| field->text_direction = element.directionForFormData() == |
| "rtl" ? base::i18n::RIGHT_TO_LEFT : base::i18n::LEFT_TO_RIGHT; |
| } |
| |
| if (IsAutofillableInputElement(input_element)) { |
| if (IsTextInput(input_element)) |
| field->max_length = input_element->maxLength(); |
| |
| field->is_checkable = IsCheckableElement(input_element); |
| field->is_checked = input_element->isChecked(); |
| } else if (IsTextAreaElement(element)) { |
| // Nothing more to do in this case. |
| } else if (extract_mask & EXTRACT_OPTIONS) { |
| // Set option strings on the field if available. |
| DCHECK(IsSelectElement(element)); |
| const WebSelectElement select_element = element.toConst<WebSelectElement>(); |
| GetOptionStringsFromElement(select_element, |
| &field->option_values, |
| &field->option_contents); |
| } |
| |
| if (!(extract_mask & EXTRACT_VALUE)) |
| return; |
| |
| base::string16 value = element.value(); |
| |
| if (IsSelectElement(element) && (extract_mask & EXTRACT_OPTION_TEXT)) { |
| const WebSelectElement select_element = element.toConst<WebSelectElement>(); |
| // Convert the |select_element| value to text if requested. |
| WebVector<WebElement> list_items = select_element.listItems(); |
| for (size_t i = 0; i < list_items.size(); ++i) { |
| if (IsOptionElement(list_items[i])) { |
| const WebOptionElement option_element = |
| list_items[i].toConst<WebOptionElement>(); |
| if (option_element.value() == value) { |
| value = option_element.text(); |
| break; |
| } |
| } |
| } |
| } |
| |
| // Constrain the maximum data length to prevent a malicious site from DOS'ing |
| // the browser: http://crbug.com/49332 |
| TruncateString(&value, kMaxDataLength); |
| |
| field->value = value; |
| } |
| |
| bool WebFormElementToFormData( |
| const blink::WebFormElement& form_element, |
| const blink::WebFormControlElement& form_control_element, |
| ExtractMask extract_mask, |
| FormData* form, |
| FormFieldData* field) { |
| const WebFrame* frame = form_element.document().frame(); |
| if (!frame) |
| return false; |
| |
| form->name = GetFormIdentifier(form_element); |
| form->origin = GetCanonicalOriginForDocument(frame->document()); |
| form->action = frame->document().completeURL(form_element.action()); |
| |
| // If the completed URL is not valid, just use the action we get from |
| // WebKit. |
| if (!form->action.is_valid()) |
| form->action = GURL(blink::WebStringToGURL(form_element.action())); |
| |
| WebVector<WebFormControlElement> control_elements; |
| form_element.getFormControlElements(control_elements); |
| |
| std::vector<blink::WebElement> dummy_fieldset; |
| return FormOrFieldsetsToFormData(&form_element, &form_control_element, |
| dummy_fieldset, control_elements, |
| extract_mask, form, field); |
| } |
| |
| std::vector<WebFormControlElement> GetUnownedFormFieldElements( |
| const WebElementCollection& elements, |
| std::vector<WebElement>* fieldsets) { |
| std::vector<WebFormControlElement> unowned_fieldset_children; |
| for (WebElement element = elements.firstItem(); |
| !element.isNull(); |
| element = elements.nextItem()) { |
| if (element.isFormControlElement()) { |
| WebFormControlElement control = element.to<WebFormControlElement>(); |
| if (control.form().isNull()) |
| unowned_fieldset_children.push_back(control); |
| } |
| |
| if (fieldsets && element.hasHTMLTagName("fieldset") && |
| !IsElementInsideFormOrFieldSet(element)) { |
| fieldsets->push_back(element); |
| } |
| } |
| return unowned_fieldset_children; |
| } |
| |
| std::vector<WebFormControlElement> GetUnownedAutofillableFormFieldElements( |
| const WebElementCollection& elements, |
| std::vector<WebElement>* fieldsets) { |
| return ExtractAutofillableElementsFromSet( |
| GetUnownedFormFieldElements(elements, fieldsets)); |
| } |
| |
| bool UnownedCheckoutFormElementsAndFieldSetsToFormData( |
| const std::vector<blink::WebElement>& fieldsets, |
| const std::vector<blink::WebFormControlElement>& control_elements, |
| const blink::WebFormControlElement* element, |
| const blink::WebDocument& document, |
| ExtractMask extract_mask, |
| FormData* form, |
| FormFieldData* field) { |
| // Only attempt formless Autofill on checkout flows. This avoids the many |
| // false positives found on the non-checkout web. See |
| // http://crbug.com/462375. |
| WebElement html_element = document.documentElement(); |
| |
| // For now this restriction only applies to English-language pages, because |
| // the keywords are not translated. Note that an empty "lang" attribute |
| // counts as English. |
| std::string lang; |
| if (!html_element.isNull()) |
| lang = html_element.getAttribute("lang").utf8(); |
| if (!lang.empty() && |
| !base::StartsWith(lang, "en", base::CompareCase::INSENSITIVE_ASCII)) { |
| return UnownedFormElementsAndFieldSetsToFormData( |
| fieldsets, control_elements, element, document, extract_mask, form, |
| field); |
| } |
| |
| // A potential problem is that this only checks document.title(), but should |
| // actually check the main frame's title. Thus it may make bad decisions for |
| // iframes. |
| base::string16 title(base::ToLowerASCII(base::string16(document.title()))); |
| |
| // Don't check the path for url's without a standard format path component, |
| // such as data:. |
| std::string path; |
| GURL url(document.url()); |
| if (url.IsStandard()) |
| path = base::ToLowerASCII(url.path()); |
| |
| const char* const kKeywords[] = { |
| "payment", |
| "checkout", |
| "address", |
| "delivery", |
| "shipping", |
| "wallet" |
| }; |
| |
| for (const auto& keyword : kKeywords) { |
| // Compare char16 elements of |title| with char elements of |keyword| using |
| // operator==. |
| auto title_pos = std::search(title.begin(), title.end(), |
| keyword, keyword + strlen(keyword)); |
| if (title_pos != title.end() || |
| path.find(keyword) != std::string::npos) { |
| form->is_formless_checkout = true; |
| // Found a keyword: treat this as an unowned form. |
| return UnownedFormElementsAndFieldSetsToFormData( |
| fieldsets, control_elements, element, document, extract_mask, form, |
| field); |
| } |
| } |
| |
| // Since it's not a checkout flow, only add fields that have a non-"off" |
| // autocomplete attribute to the formless autofill. |
| CR_DEFINE_STATIC_LOCAL(WebString, kOffAttribute, ("off")); |
| std::vector<WebFormControlElement> elements_with_autocomplete; |
| for (const WebFormControlElement& element : control_elements) { |
| blink::WebString autocomplete = element.getAttribute("autocomplete"); |
| if (autocomplete.length() && autocomplete != kOffAttribute) |
| elements_with_autocomplete.push_back(element); |
| } |
| |
| if (elements_with_autocomplete.empty()) |
| return false; |
| |
| return UnownedFormElementsAndFieldSetsToFormData( |
| fieldsets, elements_with_autocomplete, element, document, extract_mask, |
| form, field); |
| } |
| |
| bool UnownedPasswordFormElementsAndFieldSetsToFormData( |
| const std::vector<blink::WebElement>& fieldsets, |
| const std::vector<blink::WebFormControlElement>& control_elements, |
| const blink::WebFormControlElement* element, |
| const blink::WebDocument& document, |
| ExtractMask extract_mask, |
| FormData* form, |
| FormFieldData* field) { |
| return UnownedFormElementsAndFieldSetsToFormData( |
| fieldsets, control_elements, element, document, extract_mask, form, |
| field); |
| } |
| |
| |
| bool FindFormAndFieldForFormControlElement(const WebFormControlElement& element, |
| FormData* form, |
| FormFieldData* field) { |
| if (!IsAutofillableElement(element)) |
| return false; |
| |
| ExtractMask extract_mask = |
| static_cast<ExtractMask>(EXTRACT_VALUE | EXTRACT_OPTIONS); |
| const WebFormElement form_element = element.form(); |
| if (form_element.isNull()) { |
| // No associated form, try the synthetic form for unowned form elements. |
| WebDocument document = element.document(); |
| std::vector<WebElement> fieldsets; |
| std::vector<WebFormControlElement> control_elements = |
| GetUnownedAutofillableFormFieldElements(document.all(), &fieldsets); |
| return UnownedCheckoutFormElementsAndFieldSetsToFormData( |
| fieldsets, control_elements, &element, document, extract_mask, |
| form, field); |
| } |
| |
| return WebFormElementToFormData(form_element, |
| element, |
| extract_mask, |
| form, |
| field); |
| } |
| |
| void FillForm(const FormData& form, const WebFormControlElement& element) { |
| WebFormElement form_element = element.form(); |
| if (form_element.isNull()) { |
| ForEachMatchingUnownedFormField(element, |
| form, |
| FILTER_ALL_NON_EDITABLE_ELEMENTS, |
| false, /* dont force override */ |
| &FillFormField); |
| return; |
| } |
| |
| ForEachMatchingFormField(form_element, |
| element, |
| form, |
| FILTER_ALL_NON_EDITABLE_ELEMENTS, |
| false, /* dont force override */ |
| &FillFormField); |
| } |
| |
| void FillFormIncludingNonFocusableElements(const FormData& form_data, |
| const WebFormElement& form_element) { |
| if (form_element.isNull()) { |
| NOTREACHED(); |
| return; |
| } |
| |
| FieldFilterMask filter_mask = static_cast<FieldFilterMask>( |
| FILTER_DISABLED_ELEMENTS | FILTER_READONLY_ELEMENTS); |
| ForEachMatchingFormField(form_element, |
| WebInputElement(), |
| form_data, |
| filter_mask, |
| true, /* force override */ |
| &FillFormField); |
| } |
| |
| void PreviewForm(const FormData& form, const WebFormControlElement& element) { |
| WebFormElement form_element = element.form(); |
| if (form_element.isNull()) { |
| ForEachMatchingUnownedFormField(element, |
| form, |
| FILTER_ALL_NON_EDITABLE_ELEMENTS, |
| false, /* dont force override */ |
| &PreviewFormField); |
| return; |
| } |
| |
| ForEachMatchingFormField(form_element, |
| element, |
| form, |
| FILTER_ALL_NON_EDITABLE_ELEMENTS, |
| false, /* dont force override */ |
| &PreviewFormField); |
| } |
| |
| bool ClearPreviewedFormWithElement(const WebFormControlElement& element, |
| bool was_autofilled) { |
| WebFormElement form_element = element.form(); |
| std::vector<WebFormControlElement> control_elements; |
| if (form_element.isNull()) { |
| control_elements = GetUnownedAutofillableFormFieldElements( |
| element.document().all(), nullptr); |
| if (!IsElementInControlElementSet(element, control_elements)) |
| return false; |
| } else { |
| control_elements = ExtractAutofillableElementsInForm(form_element); |
| } |
| |
| for (size_t i = 0; i < control_elements.size(); ++i) { |
| // There might be unrelated elements in this form which have already been |
| // auto-filled. For example, the user might have already filled the address |
| // part of a form and now be dealing with the credit card section. We only |
| // want to reset the auto-filled status for fields that were previewed. |
| WebFormControlElement control_element = control_elements[i]; |
| |
| // Only text input, textarea and select elements can be previewed. |
| WebInputElement* input_element = toWebInputElement(&control_element); |
| if (!IsTextInput(input_element) && |
| !IsMonthInput(input_element) && |
| !IsTextAreaElement(control_element) && |
| !IsSelectElement(control_element)) |
| continue; |
| |
| // If the element is not auto-filled, we did not preview it, |
| // so there is nothing to reset. |
| if (!control_element.isAutofilled()) |
| continue; |
| |
| if ((IsTextInput(input_element) || |
| IsMonthInput(input_element) || |
| IsTextAreaElement(control_element) || |
| IsSelectElement(control_element)) && |
| control_element.suggestedValue().isEmpty()) |
| continue; |
| |
| // Clear the suggested value. For the initiating node, also restore the |
| // original value. |
| if (IsTextInput(input_element) || IsMonthInput(input_element) || |
| IsTextAreaElement(control_element)) { |
| control_element.setSuggestedValue(WebString()); |
| bool is_initiating_node = (element == control_element); |
| if (is_initiating_node) { |
| control_element.setAutofilled(was_autofilled); |
| // Clearing the suggested value in the focused node (above) can cause |
| // selection to be lost. We force selection range to restore the text |
| // cursor. |
| int length = control_element.value().length(); |
| control_element.setSelectionRange(length, length); |
| } else { |
| control_element.setAutofilled(false); |
| } |
| } else if (IsSelectElement(control_element)) { |
| control_element.setSuggestedValue(WebString()); |
| control_element.setAutofilled(false); |
| } |
| } |
| |
| return true; |
| } |
| |
| bool IsWebpageEmpty(const blink::WebFrame* frame) { |
| blink::WebDocument document = frame->document(); |
| |
| return IsWebElementEmpty(document.head()) && |
| IsWebElementEmpty(document.body()); |
| } |
| |
| bool IsWebElementEmpty(const blink::WebElement& root) { |
| CR_DEFINE_STATIC_LOCAL(WebString, kScript, ("script")); |
| CR_DEFINE_STATIC_LOCAL(WebString, kMeta, ("meta")); |
| CR_DEFINE_STATIC_LOCAL(WebString, kTitle, ("title")); |
| |
| if (root.isNull()) |
| return true; |
| |
| for (WebNode child = root.firstChild(); |
| !child.isNull(); |
| child = child.nextSibling()) { |
| if (child.isTextNode() && |
| !base::ContainsOnlyChars(child.nodeValue().utf8(), |
| base::kWhitespaceASCII)) |
| return false; |
| |
| if (!child.isElementNode()) |
| continue; |
| |
| WebElement element = child.to<WebElement>(); |
| if (!element.hasHTMLTagName(kScript) && |
| !element.hasHTMLTagName(kMeta) && |
| !element.hasHTMLTagName(kTitle)) |
| return false; |
| } |
| return true; |
| } |
| |
| void PreviewSuggestion(const base::string16& suggestion, |
| const base::string16& user_input, |
| blink::WebFormControlElement* input_element) { |
| size_t selection_start = user_input.length(); |
| if (IsFeatureSubstringMatchEnabled()) { |
| size_t offset = GetTextSelectionStart(suggestion, user_input, false); |
| // Zero selection start is for password manager, which can show usernames |
| // that do not begin with the user input value. |
| selection_start = (offset == base::string16::npos) ? 0 : offset; |
| } |
| |
| input_element->setSelectionRange(selection_start, suggestion.length()); |
| } |
| |
| } // namespace form_util |
| } // namespace autofill |