Autofill: Improve inferring label from divs
Do not traverse into divs known not to contain inferable labels.
BUG=454366
Review URL: https://codereview.chromium.org/913923004
Cr-Commit-Position: refs/heads/master@{#316172}
(cherry picked from commit 41711d061f0b1ac440e88148e99c883af6df14de)
Review URL: https://codereview.chromium.org/937473002
Cr-Commit-Position: refs/branch-heads/2272@{#311}
Cr-Branched-From: 827a380cfdb31aa54c8d56e63ce2c3fd8c3ba4d4-refs/heads/master@{#310958}
diff --git a/chrome/test/data/autofill/heuristics/input/bug_454366b.html b/chrome/test/data/autofill/heuristics/input/bug_454366b.html
new file mode 100644
index 0000000..32133d4
--- /dev/null
+++ b/chrome/test/data/autofill/heuristics/input/bug_454366b.html
@@ -0,0 +1,81 @@
+<html>
+<head></head>
+<body>
+ <input type="text name="hist_state" id="hist_state" style="display: none;">
+ <div class="g-h-f-ci g-h-f-Au">
+ <div class="g-h-f-Qm" title="Browse people" guidedhelpid="shareboxpeoplepickericon" tabindex="0" style="-webkit-user-select: none;"></div>
+ <div class="g-h-f-vc g-h-f-k" style="">
+ <div class="g-h-f-vc-B b-K b-K-Xb URaP8">
+ <span class="g-h-f-N"><span class="g-h-f-N-Rm" id="sbda">+ Add names, circles, or email addresses</span>
+ <input type="text" class="g-h-f-N-N" id="sbdp" value="" placeholder="+ Add names, circles, or email addresses" aria-label="Add names, circles, or email addresses" tabindex="0" aria-haspopup="false" role="combobox" aria-autocomplete="list">
+ </span>
+ <div class="Dp">
+ <div role="button" aria-label="Posting options" tabindex="0" class="CC tm d-k-l d-r-c" aria-expanded="false" aria-haspopup="true" style="-webkit-user-select: none;">
+ <div class="d-k-l d-r-c-sa-z">
+ <div class="d-k-l d-r-c-aa-z">
+ <div class="d-k-l d-r-c-ha"></div>
+ <div class="d-k-l d-r-c-Qa"> </div>
+ </div>
+ </div>
+ </div>
+ <div class="DC zo" style="display: none;"></div>
+ <div class="yo" style="display: none;"></div>
+ <div class="lt"></div>
+ <div class="EC"></div>
+ <div class="d-r d-r-ih cw" role="menu" aria-haspopup="true" style="display: none; -webkit-user-select: none;">
+ <div class="d-A osa Nq" role="menuitem" id=":4j" style="-webkit-user-select: none;">
+ <div class="d-A-B" style="-webkit-user-select: none;">
+ <div style="-webkit-user-select: none;">Disable comments</div>
+ </div>
+ </div>
+ <div class="d-A osa Qq" role="menuitem" id=":4k" style="-webkit-user-select: none;">
+ <div class="d-A-B" style="-webkit-user-select: none;">
+ <div style="-webkit-user-select: none;">Enable Comments</div>
+ </div>
+ </div>
+ <div class="d-A osa Rq" role="menuitem" id=":4l" style="-webkit-user-select: none;">
+ <div class="d-A-B" style="-webkit-user-select: none;">
+ <div style="-webkit-user-select: none;">Disable reshares</div>
+ </div>
+ </div>
+ <div class="d-A osa Sq" role="menuitem" id=":4m" style="-webkit-user-select: none;">
+ <div class="d-A-B" style="-webkit-user-select: none;">
+ <div style="-webkit-user-select: none;">Enable reshares</div>
+ </div>
+ </div>
+ <div class="d-A osa Oq" role="menuitem" id=":4n" style="-webkit-user-select: none;">
+ <div class="d-A-B" style="-webkit-user-select: none;">
+ <div style="-webkit-user-select: none;">Keep within <b style="-webkit-user-select: none;">Google.com</b>
+ </div>
+ </div>
+ </div>
+ <div class="d-A osa Pq" role="menuitem" id=":4o" style="-webkit-user-select: none;">
+ <div class="d-A-B" style="-webkit-user-select: none;"></div>
+ </div>
+ </div>
+ </div>
+ <span role="list" aria-atomic="true" aria-live="assertive" aria-relevant="all"></span>
+ </div>
+ <div class="g-h-f-V-nb" style="display: none;">
+ <div class="g-h-f-V-bd"></div>
+ <div class="d-r-Gk g-h-f-V b-K b-K-Xb URaP8 g-h-f-V-Lb" role="listbox" id=":4v" style="opacity: 0; -webkit-user-select: none; display: none;">
+ <div class="d-A" id=":4w" role="option" style="-webkit-user-select: none;">
+ <div class="d-A-B" style="-webkit-user-select: none;">
+ <span class="g-h-f-m-Ed-wc-E g-h-f-m" style="-webkit-user-select: none;"></span> <span class="g-h-f-za-B" style="-webkit-user-select: none;">Your circles</span>
+ </div>
+ </div>
+ <div class="d-A" id=":4y" role="option" style="-webkit-user-select: none;">
+ <div class="d-A-B" style="-webkit-user-select: none;">
+ <span class="g-h-f-m-de-E g-h-f-m" style="-webkit-user-select: none;"></span> <span class="g-h-f-za-B" style="-webkit-user-select: none;">Extended circles</span>
+ </div>
+ </div>
+ <div class="d-cm" style="-webkit-user-select: none;"></div>
+ </div>
+ </div>
+ </div>
+ <div class="g-h-f-CJ" style="display: none;">
+ <input type="text" class="g-h-f-iK">
+ </div>
+ </div>
+</body>
+</html>
diff --git a/chrome/test/data/autofill/heuristics/output/08_register_aol.com.out b/chrome/test/data/autofill/heuristics/output/08_register_aol.com.out
index 94ac64a7..7544aaf 100644
--- a/chrome/test/data/autofill/heuristics/output/08_register_aol.com.out
+++ b/chrome/test/data/autofill/heuristics/output/08_register_aol.com.out
@@ -5,8 +5,8 @@
UNKNOWN_TYPE | verifyPasswordHint | Password | Retype password | {actionForm.firstName}_1-default
UNKNOWN_TYPE | {actionForm.verifyPassword} | Password | | {actionForm.firstName}_1-default
UNKNOWN_TYPE | wlw-select_key:{actionForm.dobMonth} | Select Month | | {actionForm.firstName}_1-default
-UNKNOWN_TYPE | {actionForm.dobDay} | Select Month | Day (dd) | {actionForm.firstName}_1-default
-UNKNOWN_TYPE | {actionForm.dobYear} | Select Month | Year (yyyy) | {actionForm.firstName}_1-default
+UNKNOWN_TYPE | {actionForm.dobDay} | | Day (dd) | {actionForm.firstName}_1-default
+UNKNOWN_TYPE | {actionForm.dobYear} | | Year (yyyy) | {actionForm.firstName}_1-default
UNKNOWN_TYPE | wlw-radio_button_group_key:{actionForm.gender} | Female | Female | {actionForm.firstName}_1-default
UNKNOWN_TYPE | wlw-radio_button_group_key:{actionForm.gender} | Male | Male | {actionForm.firstName}_1-default
ADDRESS_HOME_ZIP | {actionForm.zipCode} | Zip Code | | {actionForm.firstName}_1-default
diff --git a/chrome/test/data/autofill/heuristics/output/25_checkout_m_llbean.com.out b/chrome/test/data/autofill/heuristics/output/25_checkout_m_llbean.com.out
index 3f19903..68060b3 100644
--- a/chrome/test/data/autofill/heuristics/output/25_checkout_m_llbean.com.out
+++ b/chrome/test/data/autofill/heuristics/output/25_checkout_m_llbean.com.out
@@ -13,7 +13,7 @@
ADDRESS_HOME_LINE3 | _1_address3 | Address Line 3 (optional) | | _1_personTitle_1-default
ADDRESS_HOME_STATE | _1_zipCode | Enter Zip for City and State | | _1_personTitle_1-default
ADDRESS_HOME_CITY | _1_city | City | | _1_personTitle_1-default
-ADDRESS_HOME_STATE | _1_state | City | | _1_personTitle_1-default
+ADDRESS_HOME_STATE | _1_state | | | _1_personTitle_1-default
ADDRESS_HOME_ZIP | _1_CAPostal | Postal Code | | _1_personTitle_1-default
ADDRESS_HOME_CITY | _1_CACity | City | | _1_personTitle_1-default
ADDRESS_HOME_STATE | _1_CAProvince | Province | | _1_personTitle_1-default
diff --git a/chrome/test/data/autofill/heuristics/output/bug_454366b.out b/chrome/test/data/autofill/heuristics/output/bug_454366b.out
new file mode 100644
index 0000000..8d7d1985
--- /dev/null
+++ b/chrome/test/data/autofill/heuristics/output/bug_454366b.out
@@ -0,0 +1,3 @@
+UNKNOWN_TYPE | hist_state | | | hist_state_1-default
+UNKNOWN_TYPE | sbdp | + Add names, circles, or email addresses | | hist_state_1-default
+UNKNOWN_TYPE | | | | hist_state_1-default
diff --git a/components/autofill/content/renderer/form_autofill_util.cc b/components/autofill/content/renderer/form_autofill_util.cc
index 5c7e2c5..bea44e3 100644
--- a/components/autofill/content/renderer/form_autofill_util.cc
+++ b/components/autofill/content/renderer/form_autofill_util.cc
@@ -5,6 +5,7 @@
#include "components/autofill/content/renderer/form_autofill_util.h"
#include <map>
+#include <set>
#include "base/command_line.h"
#include "base/logging.h"
@@ -169,13 +170,16 @@
// This is a helper function for the FindChildText() function (see below).
// Search depth is limited with the |depth| parameter.
-base::string16 FindChildTextInner(const WebNode& node, int depth) {
+// |divs_to_skip| is a list of <div> tags to ignore if encountered.
+base::string16 FindChildTextInner(const WebNode& node,
+ int depth,
+ const std::set<WebNode>& divs_to_skip) {
if (depth <= 0 || node.isNull())
return base::string16();
// Skip over comments.
if (node.nodeType() == WebNode::CommentNode)
- return FindChildTextInner(node.nextSibling(), depth - 1);
+ return FindChildTextInner(node.nextSibling(), depth - 1, divs_to_skip);
if (node.nodeType() != WebNode::ElementNode &&
node.nodeType() != WebNode::TextNode)
@@ -191,6 +195,9 @@
IsAutofillableElement(element.toConst<WebFormControlElement>()))) {
return base::string16();
}
+
+ if (element.hasHTMLTagName("div") && ContainsKey(divs_to_skip, node))
+ return base::string16();
}
// Extract the text exactly at this node.
@@ -198,20 +205,38 @@
// Recursively compute the children's text.
// Preserve inter-element whitespace separation.
- base::string16 child_text = FindChildTextInner(node.firstChild(), depth - 1);
+ base::string16 child_text =
+ FindChildTextInner(node.firstChild(), depth - 1, divs_to_skip);
bool add_space = node.nodeType() == WebNode::TextNode && node_text.empty();
node_text = CombineAndCollapseWhitespace(node_text, child_text, add_space);
// Recursively compute the siblings' text.
// Again, preserve inter-element whitespace separation.
base::string16 sibling_text =
- FindChildTextInner(node.nextSibling(), depth - 1);
+ FindChildTextInner(node.nextSibling(), depth - 1, divs_to_skip);
add_space = node.nodeType() == WebNode::TextNode && node_text.empty();
node_text = CombineAndCollapseWhitespace(node_text, sibling_text, add_space);
return node_text;
}
+// Same as FindChildText() below, but with a list of div nodes to skip.
+// TODO(thestig): See if other FindChildText() callers can benefit from this.
+base::string16 FindChildTextWithIgnoreList(
+ const WebNode& node,
+ const std::set<WebNode>& divs_to_skip) {
+ if (node.isTextNode())
+ return node.nodeValue();
+
+ WebNode child = node.firstChild();
+
+ const int kChildSearchDepth = 10;
+ base::string16 node_text =
+ FindChildTextInner(child, kChildSearchDepth, divs_to_skip);
+ base::TrimWhitespace(node_text, base::TRIM_ALL, &node_text);
+ return node_text;
+}
+
// Returns the aggregated values of the descendants of |element| that are
// non-empty text nodes. This is a faster alternative to |innerText()| for
// performance critical operations. It does a full depth-first search so can be
@@ -219,15 +244,7 @@
// |innerText()|, the search depth and breadth are limited to a fixed threshold.
// Whitespace is trimmed from text accumulated at descendant nodes.
base::string16 FindChildText(const WebNode& node) {
- if (node.isTextNode())
- return node.nodeValue();
-
- WebNode child = node.firstChild();
-
- const int kChildSearchDepth = 10;
- base::string16 node_text = FindChildTextInner(child, kChildSearchDepth);
- base::TrimWhitespace(node_text, base::TRIM_ALL, &node_text);
- return node_text;
+ return FindChildTextWithIgnoreList(node, std::set<WebNode>());
}
// Shared function for InferLabelFromPrevious() and InferLabelFromNext().
@@ -408,6 +425,7 @@
base::string16 InferLabelFromDivTable(const WebFormControlElement& element) {
WebNode node = element.parentNode();
bool looking_for_parent = true;
+ std::set<WebNode> divs_to_skip;
// Search the sibling and parent <div>s until we find a candidate label.
base::string16 inferred_label;
@@ -416,15 +434,21 @@
CR_DEFINE_STATIC_LOCAL(WebString, kFieldSet, ("fieldset"));
while (inferred_label.empty() && !node.isNull()) {
if (HasTagName(node, kDiv)) {
- inferred_label = FindChildText(node);
+ if (looking_for_parent)
+ inferred_label = FindChildTextWithIgnoreList(node, divs_to_skip);
+ else
+ inferred_label = FindChildText(node);
+
// Avoid sibling DIVs that contain autofillable fields.
if (!looking_for_parent && !inferred_label.empty()) {
CR_DEFINE_STATIC_LOCAL(WebString, kSelector,
("input, select, textarea"));
blink::WebExceptionCode ec = 0;
WebElement result_element = node.querySelector(kSelector, ec);
- if (!result_element.isNull())
+ if (!result_element.isNull()) {
inferred_label.clear();
+ divs_to_skip.insert(node);
+ }
}
looking_for_parent = false;